Refactor to single integrated package - Remove scripts folder

Major refactoring to create a unified, self-contained Python package:

### Architecture Changes:
- Removed scripts/ directory completely
- All functionality now in src/seo/ package
- Single entry point: ./seo (imports from src/seo/cli)
- No external dependencies on scripts folder

### New Package Structure:
src/seo/
├── __init__.py          - Package exports (SEOApp, PostExporter, etc.)
├── cli.py               - Command-line interface
├── app.py               - Main application class
├── config.py            - Configuration management
├── exporter.py          - Post export functionality (self-contained)
├── analyzer.py          - Enhanced analyzer with selective fields
├── category_proposer.py - AI category proposals (self-contained)
├── seo_checker.py       - Placeholder for future implementation
├── categories.py        - Placeholder for future implementation
├── approval.py          - Placeholder for future implementation
└── recategorizer.py     - Placeholder for future implementation

### Features:
- All modules are self-contained (no scripts dependencies)
- EnhancedPostAnalyzer with selective field analysis
- CategoryProposer for AI-powered category suggestions
- Support for in-place CSV updates with backups
- Clean, integrated codebase

### CLI Commands:
- seo export - Export posts from WordPress
- seo analyze - Analyze with AI (supports -f fields, -u update)
- seo category_propose - Propose categories
- seo status - Show output files
- seo help - Show help

### Usage Examples:
./seo export
./seo analyze -f title categories
./seo analyze -u -f meta_description
./seo category_propose
./seo status

### Benefits:
- Single source of truth
- Easier to maintain and extend
- Proper Python package structure
- Can be installed with pip install -e .
- Clean imports throughout
- No path resolution issues

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
Kevin Bataille
2026-02-16 15:20:11 +01:00
parent 95092a591f
commit c8fb141cdd
27 changed files with 468 additions and 6342 deletions

View File

View File

@@ -1,453 +0,0 @@
#!/usr/bin/env python3
"""
AI-Powered Post Analysis and Recommendation Script
Analyzes exported posts CSV using Claude via OpenRouter and provides
clear, automation-friendly recommendations for:
- Which site to move posts to
- Categories to set
- Posts to consolidate
- Posts to delete
- Posts to optimize
"""
import csv
import json
import logging
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import requests
from datetime import datetime
from config import Config
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class PostAnalyzer:
"""Analyze posts CSV using Claude AI via OpenRouter."""
def __init__(self, csv_file: str):
"""Initialize analyzer with CSV file."""
self.csv_file = Path(csv_file)
self.openrouter_api_key = Config.OPENROUTER_API_KEY
self.posts = []
self.analyzed_posts = []
self.api_calls = 0
self.ai_cost = 0.0
def load_csv(self) -> bool:
"""Load posts from CSV file."""
logger.info(f"Loading CSV: {self.csv_file}")
if not self.csv_file.exists():
logger.error(f"CSV file not found: {self.csv_file}")
return False
try:
with open(self.csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
self.posts = list(reader)
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
# Group by site for stats
by_site = {}
for post in self.posts:
site = post.get('site', '')
if site not in by_site:
by_site[site] = 0
by_site[site] += 1
for site, count in by_site.items():
logger.info(f" {site}: {count} posts")
return True
except Exception as e:
logger.error(f"Error loading CSV: {e}")
return False
def batch_posts_for_analysis(self, batch_size: int = 10) -> List[List[Dict]]:
"""Batch posts for AI analysis to manage token usage."""
batches = []
for i in range(0, len(self.posts), batch_size):
batches.append(self.posts[i:i + batch_size])
return batches
def format_batch_for_ai(self, batch: List[Dict]) -> str:
"""Format batch of posts for AI analysis."""
formatted = "POSTS TO ANALYZE:\n\n"
for i, post in enumerate(batch, 1):
formatted += f"{i}. POST ID: {post['post_id']}\n"
formatted += f" Site: {post['site']}\n"
formatted += f" Title: {post['title']}\n"
formatted += f" Status: {post['status']}\n"
formatted += f" Word Count: {post['word_count']}\n"
formatted += f" Content: {post['content_preview']}\n"
formatted += f" Current Categories: {post['categories']}\n"
formatted += f" Meta Description: {post['meta_description']}\n"
formatted += "\n"
return formatted
def get_ai_recommendations(self, batch: List[Dict]) -> Optional[str]:
"""Get AI recommendations for a batch of posts."""
if not self.openrouter_api_key:
logger.error("OPENROUTER_API_KEY not set")
return None
batch_text = self.format_batch_for_ai(batch)
prompt = f"""Analyze these blog posts and provide clear, actionable recommendations.
Website Strategy:
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
- webscroll.fr: Torrenting, File-Sharing, Tracker guides (niche audience)
- hellogeek.net: Low-traffic, experimental, off-brand, or niche content
{batch_text}
For EACH post, provide a JSON object with:
{{
"post_id": <id>,
"decision": "<ACTION>" where ACTION is ONE of:
- "Keep on mistergeek.net" (high-value, high-traffic)
- "Move to webscroll.fr" (torrenting/file-sharing content)
- "Move to hellogeek.net" (low-traffic or off-brand)
- "Delete" (spam, extremely low quality, zero traffic)
- "Consolidate with post_id:<id>" (similar content, duplicate)
"category": "<CATEGORY>" where category is ONE of:
- "VPN"
- "Software/Tools"
- "Gaming"
- "Streaming"
- "Torrenting"
- "File-Sharing"
- "SEO"
- "Content Marketing"
- "Other"
"reason": "<Brief reason for decision>",
"priority": "<High|Medium|Low>",
"notes": "<Any additional notes>"
}}
Return ONLY a JSON array. Example:
[
{{"post_id": 2845, "decision": "Keep on mistergeek.net", "category": "VPN", "reason": "High traffic, core topic", "priority": "High", "notes": "Already optimized"}},
{{"post_id": 1234, "decision": "Move to webscroll.fr", "category": "Torrenting", "reason": "Torrent tracker content", "priority": "Medium", "notes": "Good SEO potential on target site"}}
]
Analyze all posts and provide recommendations for EVERY post in the batch."""
try:
logger.info(f" Sending batch to Claude for analysis...")
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.openrouter_api_key}",
"Content-Type": "application/json",
},
json={
"model": "anthropic/claude-3.5-sonnet",
"messages": [
{"role": "user", "content": prompt}
],
"temperature": 0.3, # Lower temp for more consistent recommendations
},
timeout=60
)
response.raise_for_status()
result = response.json()
self.api_calls += 1
# Track cost
usage = result.get('usage', {})
input_tokens = usage.get('prompt_tokens', 0)
output_tokens = usage.get('completion_tokens', 0)
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
recommendations_text = result['choices'][0]['message']['content'].strip()
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
return recommendations_text
except Exception as e:
logger.error(f"Error getting AI recommendations: {e}")
return None
def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
"""Parse JSON recommendations from AI."""
try:
# Try to extract JSON from response
start_idx = recommendations_json.find('[')
end_idx = recommendations_json.rfind(']') + 1
if start_idx == -1 or end_idx == 0:
logger.error("Could not find JSON array in response")
return []
json_str = recommendations_json[start_idx:end_idx]
recommendations = json.loads(json_str)
return recommendations
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON recommendations: {e}")
logger.debug(f"Response was: {recommendations_json[:500]}")
return []
def analyze_all_posts(self) -> bool:
"""Analyze all posts in batches."""
logger.info("\n" + "="*70)
logger.info("ANALYZING POSTS WITH AI")
logger.info("="*70 + "\n")
batches = self.batch_posts_for_analysis(batch_size=10)
logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches of 10...\n")
all_recommendations = {}
for batch_num, batch in enumerate(batches, 1):
logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
recommendations_json = self.get_ai_recommendations(batch)
if not recommendations_json:
logger.error(f" Failed to get recommendations for batch {batch_num}")
continue
recommendations = self.parse_recommendations(recommendations_json)
for rec in recommendations:
all_recommendations[str(rec.get('post_id', ''))] = rec
logger.info(f" ✓ Got {len(recommendations)} recommendations")
logger.info(f"\n✓ Analysis complete!")
logger.info(f" Total recommendations: {len(all_recommendations)}")
logger.info(f" API calls: {self.api_calls}")
logger.info(f" Estimated cost: ${self.ai_cost:.4f}")
# Map recommendations to posts
for post in self.posts:
post_id = str(post['post_id'])
if post_id in all_recommendations:
rec = all_recommendations[post_id]
post['decision'] = rec.get('decision', 'No decision')
post['recommended_category'] = rec.get('category', 'Other')
post['reason'] = rec.get('reason', '')
post['priority'] = rec.get('priority', 'Medium')
post['ai_notes'] = rec.get('notes', '')
else:
post['decision'] = 'Pending'
post['recommended_category'] = 'Other'
post['reason'] = 'No recommendation'
post['priority'] = 'Medium'
post['ai_notes'] = ''
self.analyzed_posts.append(post)
return len(self.analyzed_posts) > 0
def export_with_recommendations(self) -> Tuple[str, str, str, str]:
"""Export CSV with recommendations and create action-specific files."""
output_dir = Path(__file__).parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# Main file with all recommendations
main_file = output_dir / f'posts_with_ai_recommendations_{timestamp}.csv'
# Action-specific files
moves_file = output_dir / f'posts_to_move_{timestamp}.csv'
consolidate_file = output_dir / f'posts_to_consolidate_{timestamp}.csv'
delete_file = output_dir / f'posts_to_delete_{timestamp}.csv'
# Export main file
fieldnames = list(self.analyzed_posts[0].keys()) + [
'decision',
'recommended_category',
'reason',
'priority',
'ai_notes'
]
logger.info(f"\nExporting recommendations to CSV...")
with open(main_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self.analyzed_posts)
logger.info(f"✓ Main file: {main_file}")
# Export action-specific files
posts_to_move = [p for p in self.analyzed_posts if 'Move to' in p.get('decision', '')]
posts_to_consolidate = [p for p in self.analyzed_posts if 'Consolidate' in p.get('decision', '')]
posts_to_delete = [p for p in self.analyzed_posts if p.get('decision') == 'Delete']
# Moves file
if posts_to_move:
with open(moves_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(posts_to_move)
logger.info(f"✓ Moves file ({len(posts_to_move)} posts): {moves_file}")
# Consolidate file
if posts_to_consolidate:
with open(consolidate_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(posts_to_consolidate)
logger.info(f"✓ Consolidate file ({len(posts_to_consolidate)} posts): {consolidate_file}")
# Delete file
if posts_to_delete:
with open(delete_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(posts_to_delete)
logger.info(f"✓ Delete file ({len(posts_to_delete)} posts): {delete_file}")
return (
str(main_file),
str(moves_file) if posts_to_move else None,
str(consolidate_file) if posts_to_consolidate else None,
str(delete_file) if posts_to_delete else None
)
def print_summary(self):
"""Print analysis summary."""
logger.info("\n" + "="*70)
logger.info("ANALYSIS SUMMARY")
logger.info("="*70 + "\n")
# Count decisions
decisions = {}
for post in self.analyzed_posts:
decision = post.get('decision', 'Unknown')
decisions[decision] = decisions.get(decision, 0) + 1
logger.info("DECISIONS:")
for decision, count in sorted(decisions.items(), key=lambda x: x[1], reverse=True):
logger.info(f" {decision}: {count} posts")
# Count categories
categories = {}
for post in self.analyzed_posts:
cat = post.get('recommended_category', 'Other')
categories[cat] = categories.get(cat, 0) + 1
logger.info("\nRECOMMENDED CATEGORIES:")
for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
logger.info(f" {cat}: {count} posts")
# Count priorities
priorities = {}
for post in self.analyzed_posts:
priority = post.get('priority', 'Unknown')
priorities[priority] = priorities.get(priority, 0) + 1
logger.info("\nPRIORITY BREAKDOWN:")
for priority in ['High', 'Medium', 'Low']:
count = priorities.get(priority, 0)
logger.info(f" {priority}: {count} posts")
# By site
logger.info("\nBY SITE:")
by_site = {}
for post in self.analyzed_posts:
site = post.get('site', 'Unknown')
if site not in by_site:
by_site[site] = []
by_site[site].append(post.get('decision', 'Unknown'))
for site in sorted(by_site.keys()):
logger.info(f"\n {site}:")
decisions_for_site = {}
for decision in by_site[site]:
decisions_for_site[decision] = decisions_for_site.get(decision, 0) + 1
for decision, count in sorted(decisions_for_site.items()):
logger.info(f" {decision}: {count}")
def run(self):
"""Run complete analysis."""
logger.info("="*70)
logger.info("AI-POWERED POST ANALYSIS AND RECOMMENDATIONS")
logger.info("="*70)
# Load CSV
if not self.load_csv():
sys.exit(1)
# Analyze posts
if not self.analyze_all_posts():
logger.error("Failed to analyze posts")
sys.exit(1)
# Print summary
self.print_summary()
# Export results
logger.info("\n" + "="*70)
logger.info("EXPORTING RESULTS")
logger.info("="*70)
main_file, moves_file, consol_file, delete_file = self.export_with_recommendations()
logger.info("\n" + "="*70)
logger.info("NEXT STEPS")
logger.info("="*70)
logger.info("\n1. Review main file with all recommendations:")
logger.info(f" {main_file}")
logger.info("\n2. Execute moves (automate with script):")
if moves_file:
logger.info(f" {moves_file}")
else:
logger.info(" No posts to move")
logger.info("\n3. Consolidate duplicates:")
if consol_file:
logger.info(f" {consol_file}")
else:
logger.info(" No posts to consolidate")
logger.info("\n4. Delete low-quality posts:")
if delete_file:
logger.info(f" {delete_file}")
else:
logger.info(" No posts to delete")
logger.info("\n✓ Analysis complete!")
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Analyze exported posts CSV using Claude AI and provide recommendations'
)
parser.add_argument(
'csv_file',
help='Path to exported posts CSV file'
)
args = parser.parse_args()
analyzer = PostAnalyzer(args.csv_file)
analyzer.run()
if __name__ == '__main__':
main()

View File

@@ -1,382 +0,0 @@
#!/usr/bin/env python3
"""
AI-Powered Post Re-categorization
Analyzes exported posts using Claude AI via OpenRouter and provides
category recommendations for better content organization.
"""
import csv
import json
import logging
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import requests
from datetime import datetime
from config import Config
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class PostRecategorizer:
"""Re-categorize posts using Claude AI via OpenRouter."""
def __init__(self, csv_file: str):
"""Initialize recategorizer with CSV file."""
self.csv_file = Path(csv_file)
self.openrouter_api_key = Config.OPENROUTER_API_KEY
self.posts = []
self.recategorized_posts = []
self.api_calls = 0
self.ai_cost = 0.0
def load_csv(self) -> bool:
"""Load posts from CSV file."""
logger.info(f"Loading CSV: {self.csv_file}")
if not self.csv_file.exists():
logger.error(f"CSV file not found: {self.csv_file}")
return False
try:
with open(self.csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
self.posts = list(reader)
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
# Group by site for stats
by_site = {}
for post in self.posts:
site = post.get('site', '')
if site not in by_site:
by_site[site] = 0
by_site[site] += 1
for site, count in by_site.items():
logger.info(f" {site}: {count} posts")
return True
except Exception as e:
logger.error(f"Error loading CSV: {e}")
return False
def batch_posts_for_analysis(self, batch_size: int = 10) -> List[List[Dict]]:
"""Batch posts for AI analysis to manage token usage."""
batches = []
for i in range(0, len(self.posts), batch_size):
batches.append(self.posts[i:i + batch_size])
return batches
def format_batch_for_ai(self, batch: List[Dict]) -> str:
"""Format batch of posts for AI analysis."""
formatted = "POSTS TO RECATEGORIZE:\n\n"
for i, post in enumerate(batch, 1):
formatted += f"{i}. POST ID: {post['post_id']}\n"
formatted += f" Site: {post['site']}\n"
formatted += f" Title: {post['title']}\n"
formatted += f" Current Categories: {post.get('categories', 'None')}\n"
formatted += f" Content: {post.get('content_preview', '')}...\n"
formatted += f" Word Count: {post.get('word_count', '0')}\n"
formatted += "\n"
return formatted
def get_ai_recommendations(self, batch: List[Dict]) -> Optional[str]:
"""Get AI category recommendations for a batch of posts."""
if not self.openrouter_api_key:
logger.error("OPENROUTER_API_KEY not set")
return None
batch_text = self.format_batch_for_ai(batch)
prompt = f"""Analyze these blog posts and recommend optimal categories.
Website Strategy:
- mistergeek.net: VPN, Software/Tools, Gaming, General Tech, SEO, Content Marketing
- webscroll.fr: Torrenting, File-Sharing, Tracker Guides
- hellogeek.net: Experimental, Low-traffic, Off-brand content
{batch_text}
For EACH post, provide a JSON object with:
{{
"post_id": <id>,
"current_categories": "<current>",
"recommended_categories": "<comma-separated categories>",
"reason": "<Brief reason for recommendation>",
"confidence": "High|Medium|Low"
}}
Return ONLY a JSON array. Example:
[
{{"post_id": 2845, "current_categories": "VPN", "recommended_categories": "VPN, Security", "reason": "Add security angle", "confidence": "High"}},
{{"post_id": 1234, "current_categories": "Other", "recommended_categories": "Torrenting, Guides", "reason": "Torrent-specific content", "confidence": "Medium"}}
]
Analyze all posts and provide recommendations for EVERY post in the batch."""
try:
logger.info(f" Sending batch to Claude for recategorization...")
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.openrouter_api_key}",
"Content-Type": "application/json",
},
json={
"model": "anthropic/claude-3.5-sonnet",
"messages": [
{"role": "user", "content": prompt}
],
"temperature": 0.3,
},
timeout=60
)
response.raise_for_status()
result = response.json()
self.api_calls += 1
# Track cost
usage = result.get('usage', {})
input_tokens = usage.get('prompt_tokens', 0)
output_tokens = usage.get('completion_tokens', 0)
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
recommendations_text = result['choices'][0]['message']['content'].strip()
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
return recommendations_text
except Exception as e:
logger.error(f"Error getting AI recommendations: {e}")
return None
def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
"""Parse JSON recommendations from AI."""
try:
# Try to extract JSON from response
start_idx = recommendations_json.find('[')
end_idx = recommendations_json.rfind(']') + 1
if start_idx == -1 or end_idx == 0:
logger.error("Could not find JSON array in response")
return []
json_str = recommendations_json[start_idx:end_idx]
recommendations = json.loads(json_str)
return recommendations
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON recommendations: {e}")
logger.debug(f"Response was: {recommendations_json[:500]}")
return []
def analyze_all_posts(self) -> bool:
"""Analyze all posts in batches."""
logger.info("\n" + "="*70)
logger.info("RECATEGORIZING POSTS WITH AI")
logger.info("="*70 + "\n")
batches = self.batch_posts_for_analysis(batch_size=10)
logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches of 10...\n")
all_recommendations = {}
for batch_num, batch in enumerate(batches, 1):
logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
recommendations_json = self.get_ai_recommendations(batch)
if not recommendations_json:
logger.error(f" Failed to get recommendations for batch {batch_num}")
continue
recommendations = self.parse_recommendations(recommendations_json)
for rec in recommendations:
all_recommendations[str(rec.get('post_id', ''))] = rec
logger.info(f" ✓ Got {len(recommendations)} recommendations")
logger.info(f"\n✓ Analysis complete!")
logger.info(f" Total recommendations: {len(all_recommendations)}")
logger.info(f" API calls: {self.api_calls}")
logger.info(f" Estimated cost: ${self.ai_cost:.4f}")
# Map recommendations to posts
for post in self.posts:
post_id = str(post['post_id'])
if post_id in all_recommendations:
rec = all_recommendations[post_id]
post['recommended_categories'] = rec.get('recommended_categories', post.get('categories', ''))
post['recategorization_reason'] = rec.get('reason', '')
post['recategorization_confidence'] = rec.get('confidence', 'Medium')
else:
post['recommended_categories'] = post.get('categories', '')
post['recategorization_reason'] = 'No recommendation'
post['recategorization_confidence'] = 'Unknown'
self.recategorized_posts.append(post)
return len(self.recategorized_posts) > 0
def export_with_recommendations(self) -> Tuple[str, str]:
"""Export CSV with recategorization recommendations."""
output_dir = Path(__file__).parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# Main file with all recommendations
main_file = output_dir / f'posts_with_recategorization_{timestamp}.csv'
# Differences file (only posts with different recommendations)
changes_file = output_dir / f'category_changes_only_{timestamp}.csv'
# Full fieldnames including new recommendation columns
fieldnames = list(self.recategorized_posts[0].keys()) + [
'recommended_categories',
'recategorization_reason',
'recategorization_confidence'
]
logger.info(f"\nExporting recategorization recommendations to CSV...")
# Export main file with all posts
with open(main_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self.recategorized_posts)
logger.info(f"✓ Main file: {main_file}")
# Export changes file (only posts where category changed)
posts_with_changes = [
p for p in self.recategorized_posts
if p.get('categories', '') != p.get('recommended_categories', '')
]
if posts_with_changes:
with open(changes_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(posts_with_changes)
logger.info(f"✓ Changes file ({len(posts_with_changes)} posts): {changes_file}")
else:
logger.info(f" No category changes recommended")
return (str(main_file), str(changes_file) if posts_with_changes else None)
def print_summary(self):
"""Print recategorization summary."""
logger.info("\n" + "="*70)
logger.info("RECATEGORIZATION SUMMARY")
logger.info("="*70 + "\n")
# Count changes by site
by_site = {}
total_changes = 0
for post in self.recategorized_posts:
site = post.get('site', 'Unknown')
if site not in by_site:
by_site[site] = {'total': 0, 'changed': 0}
by_site[site]['total'] += 1
if post.get('categories', '') != post.get('recommended_categories', ''):
by_site[site]['changed'] += 1
total_changes += 1
logger.info("CHANGES BY SITE:")
for site in sorted(by_site.keys()):
stats = by_site[site]
logger.info(f" {site}: {stats['changed']} changes out of {stats['total']} posts")
logger.info(f"\nTOTAL CHANGES: {total_changes} out of {len(self.recategorized_posts)} posts")
logger.info(f" ({(total_changes/len(self.recategorized_posts)*100):.1f}% of posts)")
# Confidence breakdown
logger.info("\nRECOMMENDATION CONFIDENCE:")
confidence_counts = {}
for post in self.recategorized_posts:
conf = post.get('recategorization_confidence', 'Unknown')
confidence_counts[conf] = confidence_counts.get(conf, 0) + 1
for conf in ['High', 'Medium', 'Low', 'Unknown']:
count = confidence_counts.get(conf, 0)
if count > 0:
logger.info(f" {conf}: {count} posts ({(count/len(self.recategorized_posts)*100):.1f}%)")
def run(self):
"""Run complete recategorization analysis."""
logger.info("="*70)
logger.info("AI-POWERED POST RECATEGORIZATION")
logger.info("="*70)
# Load CSV
if not self.load_csv():
sys.exit(1)
# Analyze posts
if not self.analyze_all_posts():
logger.error("Failed to analyze posts")
sys.exit(1)
# Print summary
self.print_summary()
# Export results
logger.info("\n" + "="*70)
logger.info("EXPORTING RESULTS")
logger.info("="*70)
main_file, changes_file = self.export_with_recommendations()
logger.info("\n" + "="*70)
logger.info("NEXT STEPS")
logger.info("="*70)
logger.info("\n1. Review recategorization recommendations:")
logger.info(f" {main_file}")
logger.info("\n2. Review only posts with category changes:")
if changes_file:
logger.info(f" {changes_file}")
else:
logger.info(" No changes recommended")
logger.info("\n3. Apply recommendations:")
logger.info(" Use categorization automation script (coming soon)")
logger.info(" Or manually update categories in WordPress")
logger.info("\n✓ Recategorization analysis complete!")
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Re-categorize posts using Claude AI for better organization'
)
parser.add_argument(
'csv_file',
help='Path to exported posts CSV file'
)
args = parser.parse_args()
recategorizer = PostRecategorizer(args.csv_file)
recategorizer.run()
if __name__ == '__main__':
main()

View File

@@ -1,427 +0,0 @@
"""
Analytics data importer for SEO analysis.
Merges Google Analytics and Search Console data with WordPress posts.
"""
import csv
import json
import argparse
from pathlib import Path
from urllib.parse import urlparse, parse_qs
from collections import defaultdict
from config import Config
class AnalyticsImporter:
"""Import and consolidate analytics data with WordPress posts."""
def __init__(self):
"""Initialize importer."""
self.config = Config
self.output_dir = self.config.OUTPUT_DIR
self.logs = []
self.unmatched_urls = []
def log(self, message):
"""Add message to log."""
self.logs.append(message)
print(message)
def normalize_url(self, url):
"""Normalize URL for matching."""
if not url:
return ""
# Remove trailing slash, protocol, www
url = url.rstrip('/')
if url.startswith('http'):
url = urlparse(url).path
url = url.replace('www.', '')
return url.lower()
def extract_post_slug_from_url(self, url):
"""Extract post slug from URL path."""
path = urlparse(url).path.rstrip('/')
parts = [p for p in path.split('/') if p]
if parts:
return parts[-1] # Last part is usually the slug
return None
def load_ga4_data(self, ga4_csv):
"""Load Google Analytics 4 data."""
ga_data = {}
if not ga4_csv.exists():
self.log(f"⚠️ GA4 file not found: {ga4_csv}")
return ga_data
try:
with open(ga4_csv, 'r', encoding='utf-8') as f:
# Skip comment lines at the top (lines starting with #)
lines = [line for line in f if not line.startswith('#')]
reader = csv.DictReader(lines)
for row in reader:
if not row:
continue
# Handle French and English column names
url = (row.get('Page path and screen class') or
row.get('Chemin de la page et classe de l\'écran') or
row.get('Page path') or
row.get('Page') or '')
if not url:
continue
# Normalize URL
normalized = self.normalize_url(url)
# Extract metrics (handle French and English column names)
try:
traffic = int(float(row.get('Screened Views', row.get('Views', row.get('Vues', '0'))) or 0))
users = int(float(row.get('Users', row.get('Utilisateurs actifs', '0')) or 0))
bounce_rate = float(row.get('Bounce rate', row.get('Taux de rebond', '0')) or 0)
avg_duration_str = (row.get('Average session duration',
row.get('Durée d\'engagement moyenne par utilisateur actif', '0')) or '0')
avg_duration = float(avg_duration_str.replace(',', '.'))
except (ValueError, TypeError):
traffic = users = 0
bounce_rate = avg_duration = 0
ga_data[normalized] = {
'traffic': traffic,
'users': users,
'bounce_rate': bounce_rate,
'avg_session_duration': avg_duration,
'ga_url': url
}
self.log(f"✓ Loaded {len(ga_data)} GA4 entries")
except Exception as e:
self.log(f"❌ Error reading GA4 file: {e}")
return ga_data
def load_gsc_data(self, gsc_csv):
"""Load Google Search Console data (Page-level or Query-level)."""
gsc_data = {}
if not gsc_csv.exists():
self.log(f"⚠️ GSC file not found: {gsc_csv}")
return gsc_data
try:
with open(gsc_csv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
if not row:
continue
# Determine if this is page-level or query-level data
# Pages.csv has: "Pages les plus populaires", Queries.csv has: "Requêtes les plus fréquentes"
url = (row.get('Page') or
row.get('Pages les plus populaires') or
row.get('URL') or '')
query = row.get('Query') or row.get('Requêtes les plus fréquentes', '').strip()
# Skip rows without URLs (query-only data)
if not url:
continue
# Try to parse metrics with flexible column names
try:
# Handle different number formats (decimal separator, percentage signs)
clicks_str = row.get('Clics', row.get('Clicks', '0')) or '0'
impressions_str = row.get('Impressions', '0') or '0'
ctr_str = row.get('CTR', '0') or '0'
position_str = row.get('Position', '0') or '0'
clicks = int(float(clicks_str.replace(',', '.').rstrip('%')))
impressions = int(float(impressions_str.replace(',', '.')))
ctr = float(ctr_str.replace(',', '.').rstrip('%')) / 100
position = float(position_str.replace(',', '.'))
except (ValueError, TypeError, AttributeError):
clicks = impressions = 0
ctr = position = 0
normalized = self.normalize_url(url)
if normalized not in gsc_data:
gsc_data[normalized] = {
'impressions': 0,
'clicks': 0,
'avg_position': 0,
'ctr': 0,
'keywords': [],
'gsc_url': url
}
# Accumulate data (in case of multiple rows per URL)
gsc_data[normalized]['impressions'] += impressions
gsc_data[normalized]['clicks'] += clicks
# Store position
if position > 0:
gsc_data[normalized]['positions'] = gsc_data[normalized].get('positions', [])
gsc_data[normalized]['positions'].append(position)
if query and query not in gsc_data[normalized]['keywords']:
gsc_data[normalized]['keywords'].append(query)
# Calculate average positions and finalize
for data in gsc_data.values():
if data.get('positions'):
data['avg_position'] = sum(data['positions']) / len(data['positions'])
del data['positions']
# Recalculate CTR from totals
if data['impressions'] > 0:
data['ctr'] = data['clicks'] / data['impressions']
data['keywords_count'] = len(data.get('keywords', []))
self.log(f"✓ Loaded {len(gsc_data)} GSC entries")
except Exception as e:
self.log(f"❌ Error reading GSC file: {e}")
return gsc_data
def load_posts_csv(self, posts_csv):
"""Load existing WordPress posts CSV."""
posts = {}
if not posts_csv.exists():
self.log(f"⚠️ Posts file not found: {posts_csv}")
return posts
try:
with open(posts_csv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
# Handle different column name variations
post_id = row.get('ID') or row.get('post_id')
post_url = row.get('URL') or row.get('Post URL') or row.get('post_url')
post_slug = row.get('Post Slug') or row.get('Slug') or row.get('post_slug')
post_title = row.get('Title') or row.get('post_title')
if not post_id:
continue
normalized = self.normalize_url(post_url) if post_url else ""
# Handle different SEO column names
seo_title = (row.get('SEO Title') or
row.get('proposed_seo_title') or
row.get('current_seo_title') or '')
meta_desc = (row.get('Meta Description') or
row.get('proposed_meta_description') or
row.get('current_meta_description') or '')
posts[post_id] = {
'title': post_title or '',
'url': post_url,
'slug': post_slug,
'normalized_url': normalized,
'seo_title': seo_title,
'meta_description': meta_desc,
**{k: v for k, v in row.items()
if k not in ['ID', 'post_id', 'Title', 'post_title', 'URL', 'Post URL', 'post_url',
'Post Slug', 'Slug', 'post_slug', 'SEO Title', 'proposed_seo_title',
'current_seo_title', 'Meta Description', 'proposed_meta_description',
'current_meta_description']}
}
self.log(f"✓ Loaded {len(posts)} posts from CSV")
except Exception as e:
self.log(f"❌ Error reading posts CSV: {e}")
return posts
def match_analytics_to_posts(self, posts, ga_data, gsc_data):
"""Match analytics data to posts with fuzzy matching."""
self.log("\n📊 Matching analytics data to posts...")
matched_count = 0
for post_id, post_info in posts.items():
slug = post_info.get('slug') or self.extract_post_slug_from_url(post_info.get('url', ''))
normalized_url = post_info.get('normalized_url', '')
# Try direct URL match first
if normalized_url in ga_data:
post_info['ga_data'] = ga_data[normalized_url]
matched_count += 1
else:
post_info['ga_data'] = {}
if normalized_url in gsc_data:
post_info['gsc_data'] = gsc_data[normalized_url]
matched_count += 1
else:
post_info['gsc_data'] = {}
# Try slug-based matching if URL didn't match
if not post_info.get('gsc_data') and slug:
for gsc_url, gsc_info in gsc_data.items():
if slug in gsc_url:
post_info['gsc_data'] = gsc_info
break
# Track unmatched GSC URLs
matched_gsc_urls = set()
for post in posts.values():
if post.get('gsc_data'):
matched_gsc_urls.add(id(post['gsc_data']))
for normalized_url, gsc_info in gsc_data.items():
if id(gsc_info) not in matched_gsc_urls and gsc_info.get('impressions', 0) > 0:
self.unmatched_urls.append({
'url': gsc_info.get('gsc_url', normalized_url),
'impressions': gsc_info.get('impressions', 0),
'clicks': gsc_info.get('clicks', 0),
'avg_position': gsc_info.get('avg_position', 0)
})
self.log(f"✓ Matched data to posts")
return posts
def enrich_posts_data(self, posts):
"""Enrich posts with calculated metrics."""
for post_info in posts.values():
ga = post_info.get('ga_data', {})
gsc = post_info.get('gsc_data', {})
# GA metrics
post_info['traffic'] = ga.get('traffic', 0)
post_info['users'] = ga.get('users', 0)
post_info['bounce_rate'] = ga.get('bounce_rate', 0)
post_info['avg_session_duration'] = ga.get('avg_session_duration', 0)
# GSC metrics
post_info['impressions'] = gsc.get('impressions', 0)
post_info['clicks'] = gsc.get('clicks', 0)
post_info['avg_position'] = gsc.get('avg_position', 0)
post_info['ctr'] = gsc.get('ctr', 0)
post_info['keywords_count'] = gsc.get('keywords_count', 0)
post_info['top_keywords'] = ','.join(gsc.get('keywords', [])[:5])
return posts
def export_enriched_csv(self, posts, output_csv):
"""Export enriched posts data to CSV."""
if not posts:
self.log("❌ No posts to export")
return
try:
fieldnames = [
'ID', 'Title', 'URL', 'SEO Title', 'Meta Description',
'traffic', 'users', 'bounce_rate', 'avg_session_duration',
'impressions', 'clicks', 'avg_position', 'ctr', 'keywords_count', 'top_keywords'
]
# Add any extra fields from original posts
all_keys = set()
for post in posts.values():
all_keys.update(post.keys())
extra_fields = [k for k in sorted(all_keys)
if k not in fieldnames and k not in ['ga_data', 'gsc_data', 'normalized_url', 'slug']]
fieldnames.extend(extra_fields)
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
writer.writeheader()
for post_id, post_info in sorted(posts.items()):
row = {'ID': post_id}
row.update(post_info)
# Clean up nested dicts
for key in ['ga_data', 'gsc_data']:
row.pop(key, None)
writer.writerow(row)
self.log(f"✓ Exported {len(posts)} posts to {output_csv}")
except Exception as e:
self.log(f"❌ Error exporting CSV: {e}")
def export_log(self, log_file):
"""Export analysis log and unmatched URLs."""
try:
with open(log_file, 'w', encoding='utf-8') as f:
f.write("SEO Analytics Import Report\n")
f.write("=" * 60 + "\n\n")
f.write("Import Log:\n")
f.write("-" * 60 + "\n")
for log_msg in self.logs:
f.write(log_msg + "\n")
f.write("\n" + "=" * 60 + "\n")
f.write(f"Unmatched URLs ({len(self.unmatched_urls)} total):\n")
f.write("-" * 60 + "\n")
if self.unmatched_urls:
# Sort by impressions descending
for url_data in sorted(self.unmatched_urls,
key=lambda x: x['impressions'],
reverse=True):
f.write(f"\nURL: {url_data['url']}\n")
f.write(f" Impressions: {url_data['impressions']}\n")
f.write(f" Clicks: {url_data['clicks']}\n")
f.write(f" Avg Position: {url_data['avg_position']:.1f}\n")
else:
f.write("✓ All URLs matched successfully!\n")
self.log(f"✓ Exported log to {log_file}")
except Exception as e:
self.log(f"❌ Error exporting log: {e}")
def run(self, ga_csv, gsc_csv, posts_csv, output_csv):
"""Run complete import workflow."""
self.log("Starting analytics import...")
self.log(f"GA4 CSV: {ga_csv}")
self.log(f"GSC CSV: {gsc_csv}")
self.log(f"Posts CSV: {posts_csv}\n")
# Load data
ga_data = self.load_ga4_data(ga_csv)
gsc_data = self.load_gsc_data(gsc_csv)
posts = self.load_posts_csv(posts_csv)
if not posts:
self.log("❌ No posts found. Cannot proceed.")
return
# Match and merge
posts = self.match_analytics_to_posts(posts, ga_data, gsc_data)
posts = self.enrich_posts_data(posts)
# Export
self.export_enriched_csv(posts, output_csv)
# Export log
log_dir = self.output_dir / 'logs'
log_dir.mkdir(exist_ok=True)
log_file = log_dir / 'import_log.txt'
self.export_log(log_file)
self.log("\n✓ Analytics import complete!")
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(description='Import and merge analytics data')
parser.add_argument('--ga-export', type=Path,
default=Path('input/analytics/ga4_export.csv'),
help='GA4 export CSV path')
parser.add_argument('--gsc-export', type=Path,
default=Path('input/analytics/gsc/Pages.csv'),
help='Search Console export CSV path (Pages data)')
parser.add_argument('--posts-csv', type=Path,
default=Path('input/new-propositions.csv'),
help='Posts CSV path')
parser.add_argument('--output', type=Path,
default=Path('output/results/posts_with_analytics.csv'),
help='Output CSV path')
args = parser.parse_args()
importer = AnalyticsImporter()
importer.run(args.ga_export, args.gsc_export, args.posts_csv, args.output)
if __name__ == '__main__':
main()

View File

@@ -1,614 +0,0 @@
#!/usr/bin/env python3
"""
WordPress Category Management Script
Fetches all categories from WordPress sites, proposes new categories,
and allows assigning posts to categories or websites using AI recommendations.
"""
import csv
import json
import logging
import sys
from pathlib import Path
from typing import Dict, List, Optional
import requests
from requests.auth import HTTPBasicAuth
import time
from datetime import datetime
from config import Config
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class AICategoryAdvisor:
"""AI-powered advisor for category and site recommendations."""
def __init__(self):
self.openrouter_api_key = Config.OPENROUTER_API_KEY
self.ai_model = Config.AI_MODEL
self.api_calls = 0
self.ai_cost = 0.0
def get_ai_category_recommendations(self, posts_batch: List[Dict]) -> Optional[List[Dict]]:
"""
Get AI recommendations for category assignments.
Args:
posts_batch: List of posts to analyze
Returns:
List of recommendations for each post
"""
if not self.openrouter_api_key:
logger.error("OPENROUTER_API_KEY not set")
return None
# Format posts for AI analysis
formatted_posts = []
for i, post in enumerate(posts_batch, 1):
title = post.get('title', {}).get('rendered', 'Untitled')
content = post.get('content', {}).get('rendered', '')[:500] # First 500 chars
current_categories = post.get('categories', [])
formatted_posts.append(
f"{i}. POST ID: {post['id']}\n"
f" Title: {title}\n"
f" Content Preview: {content}...\n"
f" Current Categories: {current_categories}\n"
)
posts_text = "\n".join(formatted_posts)
prompt = f"""Analyze these blog posts and provide category recommendations.
Website Strategy:
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
- webscroll.fr: Torrenting, File-Sharing, Tracker guides (niche audience)
- hellogeek.net: Low-traffic, experimental, off-brand, or niche content
{posts_text}
For EACH post, provide a JSON object with:
{{
"post_id": <id>,
"recommended_category": "<SUGGESTED_CATEGORY>",
"recommended_site": "<SITE_NAME>",
"reason": "<Brief reason for recommendation>",
"confidence": "<High|Medium|Low>"
}}
Return ONLY a JSON array. Example:
[
{{"post_id": 2845, "recommended_category": "VPN", "recommended_site": "mistergeek.net", "reason": "Core VPN topic", "confidence": "High"}},
{{"post_id": 1234, "recommended_category": "Torrenting", "recommended_site": "webscroll.fr", "reason": "Torrent tracker content", "confidence": "High"}}
]
Analyze all posts and provide recommendations for EVERY post in the batch."""
try:
logger.info(f" Sending batch to AI for category recommendations...")
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.openrouter_api_key}",
"Content-Type": "application/json",
},
json={
"model": self.ai_model,
"messages": [
{"role": "user", "content": prompt}
],
"temperature": 0.3, # Lower temp for more consistent recommendations
},
timeout=60
)
response.raise_for_status()
result = response.json()
self.api_calls += 1
# Track cost
usage = result.get('usage', {})
input_tokens = usage.get('prompt_tokens', 0)
output_tokens = usage.get('completion_tokens', 0)
# Using Claude 3.5 Sonnet pricing: $3/$15 per 1M tokens
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
recommendations_text = result['choices'][0]['message']['content'].strip()
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
# Parse the recommendations
return self._parse_recommendations(recommendations_text)
except Exception as e:
logger.error(f"Error getting AI recommendations: {e}")
return None
def _parse_recommendations(self, recommendations_json: str) -> List[Dict]:
"""Parse JSON recommendations from AI."""
try:
# Try to extract JSON from response
start_idx = recommendations_json.find('[')
end_idx = recommendations_json.rfind(']') + 1
if start_idx == -1 or end_idx == 0:
logger.error("Could not find JSON array in response")
return []
json_str = recommendations_json[start_idx:end_idx]
recommendations = json.loads(json_str)
return recommendations
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON recommendations: {e}")
logger.debug(f"Response was: {recommendations_json[:500]}")
return []
class CategoryManager:
"""Manage WordPress categories across multiple sites."""
def __init__(self):
"""Initialize the category manager with sites from Config."""
self.sites = Config.WORDPRESS_SITES
self.categories_by_site = {}
self.posts_by_site = {}
self.proposed_categories = {}
self.category_assignments = []
self.ai_advisor = AICategoryAdvisor()
def fetch_categories_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
"""
Fetch all categories from a WordPress site.
Args:
site_name: Website name
site_config: Site configuration dict
Returns:
List of categories with metadata
"""
logger.info(f"Fetching categories from {site_name}...")
categories = []
base_url = site_config['url'].rstrip('/')
api_url = f"{base_url}/wp-json/wp/v2/categories"
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
try:
# Fetch all categories (pagination if needed)
page = 1
while True:
params = {
'page': page,
'per_page': 100,
}
response = requests.get(api_url, params=params, auth=auth, timeout=10)
if response.status_code == 401:
logger.error(f"Unauthorized access to {site_name}. Check credentials.")
break
elif response.status_code == 403:
logger.error(f"Forbidden access to {site_name}. Check permissions.")
break
response.raise_for_status()
page_categories = response.json()
if not page_categories:
break
categories.extend(page_categories)
logger.info(f" Page {page}: Got {len(page_categories)} categories")
# Check if there are more pages
link_header = response.headers.get('Link', '')
if 'rel="next"' not in link_header:
break
page += 1
time.sleep(0.5)
logger.info(f"✓ Total categories from {site_name}: {len(categories)}")
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching categories from {site_name}: {e}")
return []
return categories
def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
"""
Fetch posts from a WordPress site to see current category assignments.
Args:
site_name: Website name
site_config: Site configuration dict
Returns:
List of posts with category information
"""
logger.info(f"Fetching posts from {site_name} to analyze category assignments...")
posts = []
base_url = site_config['url'].rstrip('/')
api_url = f"{base_url}/wp-json/wp/v2/posts"
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
try:
page = 1
while True:
params = {
'page': page,
'per_page': 100,
'status': 'publish',
}
response = requests.get(api_url, params=params, auth=auth, timeout=10)
if response.status_code == 401:
logger.error(f"Unauthorized access to {site_name}. Check credentials.")
break
elif response.status_code == 403:
logger.error(f"Forbidden access to {site_name}. Check permissions.")
break
response.raise_for_status()
page_posts = response.json()
if not page_posts:
break
posts.extend(page_posts)
logger.info(f" Page {page}: Got {len(page_posts)} posts")
# Check if there are more pages
link_header = response.headers.get('Link', '')
if 'rel="next"' not in link_header:
break
page += 1
time.sleep(0.5)
logger.info(f"✓ Total posts from {site_name}: {len(posts)}")
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching posts from {site_name}: {e}")
return []
return posts
def analyze_categories(self):
"""Analyze current categories and propose new ones."""
logger.info("\n" + "="*70)
logger.info("ANALYZING CURRENT CATEGORIES")
logger.info("="*70)
for site_name, config in self.sites.items():
categories = self.fetch_categories_from_site(site_name, config)
posts = self.fetch_posts_from_site(site_name, config)
self.categories_by_site[site_name] = categories
self.posts_by_site[site_name] = posts
logger.info(f"\n{site_name}:")
logger.info(f" Categories: {len(categories)}")
logger.info(f" Posts: {len(posts)}")
# Show top categories by post count
if categories:
logger.info(" Top 10 categories by post count:")
# Sort categories by count (most posts first)
sorted_cats = sorted(categories, key=lambda x: x.get('count', 0), reverse=True)
for i, cat in enumerate(sorted_cats[:10]):
logger.info(f" {i+1}. {cat['name']} ({cat['count']} posts)")
def propose_new_categories(self):
"""Propose new categories based on content analysis."""
logger.info("\n" + "="*70)
logger.info("PROPOSING NEW CATEGORIES")
logger.info("="*70)
# Define category proposals based on content analysis
category_proposals = {
'mistergeek.net': [
{'name': 'VPN Reviews', 'description': 'Reviews of VPN services', 'parent': 0},
{'name': 'Software Tutorials', 'description': 'Step-by-step software guides', 'parent': 0},
{'name': 'Tech News', 'description': 'Latest technology news', 'parent': 0},
{'name': 'Cybersecurity', 'description': 'Security tips and tools', 'parent': 0},
],
'webscroll.fr': [
{'name': 'Torrent Clients', 'description': 'Reviews of torrent clients', 'parent': 0},
{'name': 'Privacy Tools', 'description': 'Privacy-focused tools and services', 'parent': 0},
{'name': 'File Sharing Guide', 'description': 'Guides on file sharing methods', 'parent': 0},
],
'hellogeek.net': [
{'name': 'Experimental Tech', 'description': 'New and experimental tech', 'parent': 0},
{'name': 'Random Thoughts', 'description': 'Opinion and commentary posts', 'parent': 0},
{'name': 'Testing Zone', 'description': 'Posts for testing purposes', 'parent': 0},
]
}
for site_name in self.sites.keys():
if site_name in category_proposals:
self.proposed_categories[site_name] = category_proposals[site_name]
logger.info(f"\n{site_name} - Proposed categories:")
for cat in category_proposals[site_name]:
logger.info(f" - {cat['name']}: {cat['description']}")
def create_category_assignment_proposals(self):
"""Create proposals for assigning posts to categories or websites."""
logger.info("\n" + "="*70)
logger.info("CREATING CATEGORY ASSIGNMENT PROPOSALS")
logger.info("="*70)
# Analyze posts and propose category assignments
for site_name, posts in self.posts_by_site.items():
logger.info(f"\nAnalyzing posts from {site_name} for category assignments...")
# Process posts in batches for AI analysis
batch_size = 10
for i in range(0, len(posts), batch_size):
batch = posts[i:i + batch_size]
# Get AI recommendations for this batch
ai_recommendations = self.ai_advisor.get_ai_category_recommendations(batch)
if ai_recommendations:
# Map AI recommendations to our assignment format
for post in batch:
title = post.get('title', {}).get('rendered', 'Untitled')
content = post.get('content', {}).get('rendered', '')[:200] # First 200 chars
current_categories = post.get('categories', [])
# Find the AI recommendation for this post
ai_rec = None
for rec in ai_recommendations:
if rec.get('post_id') == post['id']:
ai_rec = rec
break
if ai_rec:
assignment = {
'site': site_name,
'post_id': post['id'],
'post_title': title[:50] + "..." if len(title) > 50 else title,
'current_categories': current_categories,
'proposed_category': ai_rec.get('recommended_category', 'Uncategorized'),
'proposed_site': ai_rec.get('recommended_site', site_name),
'reason': ai_rec.get('reason', ''),
'confidence': ai_rec.get('confidence', 'Low'),
'content_preview': content[:100] + "..." if len(content) > 100 else content,
'status': 'pending_approval'
}
else:
# Fallback to keyword-based suggestion if no AI recommendation
proposed_category = self._suggest_category_by_content(title + " " + content, site_name)
assignment = {
'site': site_name,
'post_id': post['id'],
'post_title': title[:50] + "..." if len(title) > 50 else title,
'current_categories': current_categories,
'proposed_category': proposed_category,
'proposed_site': site_name,
'reason': 'Keyword-based suggestion',
'confidence': 'Low',
'content_preview': content[:100] + "..." if len(content) > 100 else content,
'status': 'pending_approval'
}
self.category_assignments.append(assignment)
else:
# If AI is not available, use keyword-based suggestions
for post in batch:
title = post.get('title', {}).get('rendered', 'Untitled')
content = post.get('content', {}).get('rendered', '')[:200] # First 200 chars
current_categories = post.get('categories', [])
proposed_category = self._suggest_category_by_content(title + " " + content, site_name)
assignment = {
'site': site_name,
'post_id': post['id'],
'post_title': title[:50] + "..." if len(title) > 50 else title,
'current_categories': current_categories,
'proposed_category': proposed_category,
'proposed_site': site_name,
'reason': 'Keyword-based suggestion',
'confidence': 'Low',
'content_preview': content[:100] + "..." if len(content) > 100 else content,
'status': 'pending_approval'
}
self.category_assignments.append(assignment)
logger.info(f"Created {len(self.category_assignments)} category assignment proposals")
def _suggest_category_by_content(self, content: str, site_name: str) -> str:
"""Suggest a category based on content keywords."""
content_lower = content.lower()
# Site-specific category mappings
category_keywords = {
'mistergeek.net': {
'VPN': ['vpn', 'proxy', 'privacy', 'secure', 'encryption'],
'Software': ['software', 'app', 'tool', 'download', 'install'],
'Gaming': ['game', 'gaming', 'console', 'steam', 'playstation'],
'Tech News': ['news', 'update', 'release', 'announced'],
'Cybersecurity': ['security', 'malware', 'antivirus', 'hacking', 'breach']
},
'webscroll.fr': {
'Torrent': ['torrent', 'download', 'upload', 'client', 'tracker'],
'Privacy': ['privacy', 'anonymous', 'tor', 'vpn'],
'File Sharing': ['share', 'sharing', 'ddl', 'upload']
},
'hellogeek.net': {
'Opinion': ['think', 'believe', 'opinion', 'view', 'perspective'],
'Tutorial': ['how to', 'guide', 'tutorial', 'steps', 'instructions'],
'Review': ['review', 'rating', 'comparison', 'test']
}
}
site_categories = category_keywords.get(site_name, {})
for category, keywords in site_categories.items():
for keyword in keywords:
if keyword in content_lower:
return category
return 'Uncategorized'
def export_categories_csv(self) -> str:
"""Export current categories to CSV."""
output_dir = Path(__file__).parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
csv_file = output_dir / f'current_categories_{timestamp}.csv'
fieldnames = ['site', 'category_id', 'name', 'slug', 'description', 'post_count', 'parent_id']
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for site_name, categories in self.categories_by_site.items():
for cat in categories:
writer.writerow({
'site': site_name,
'category_id': cat.get('id', ''),
'name': cat.get('name', ''),
'slug': cat.get('slug', ''),
'description': cat.get('description', ''),
'post_count': cat.get('count', 0),
'parent_id': cat.get('parent', 0)
})
logger.info(f"✓ Current categories exported to: {csv_file}")
return str(csv_file)
def export_proposed_categories_csv(self) -> str:
"""Export proposed new categories to CSV."""
output_dir = Path(__file__).parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
csv_file = output_dir / f'proposed_categories_{timestamp}.csv'
fieldnames = ['site', 'proposed_category', 'description', 'parent_category', 'reason']
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for site_name, categories in self.proposed_categories.items():
for cat in categories:
writer.writerow({
'site': site_name,
'proposed_category': cat.get('name', ''),
'description': cat.get('description', ''),
'parent_category': cat.get('parent', 0),
'reason': 'Content analysis and organization improvement'
})
logger.info(f"✓ Proposed categories exported to: {csv_file}")
return str(csv_file)
def export_category_assignments_csv(self) -> str:
"""Export category assignment proposals to CSV."""
output_dir = Path(__file__).parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
csv_file = output_dir / f'category_assignments_{timestamp}.csv'
fieldnames = ['site', 'post_id', 'post_title', 'current_categories', 'proposed_category', 'proposed_site', 'reason', 'confidence', 'content_preview', 'status']
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for assignment in self.category_assignments:
writer.writerow(assignment)
logger.info(f"✓ Category assignments exported to: {csv_file}")
return str(csv_file)
def run(self):
"""Run complete category management process."""
logger.info("="*70)
logger.info("WORDPRESS CATEGORY MANAGEMENT")
logger.info("="*70)
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
logger.info("")
# Analyze current categories
self.analyze_categories()
# Propose new categories
self.propose_new_categories()
# Create category assignment proposals
self.create_category_assignment_proposals()
# Export all data
logger.info("\n" + "="*70)
logger.info("EXPORTING RESULTS")
logger.info("="*70)
categories_csv = self.export_categories_csv()
proposed_csv = self.export_proposed_categories_csv()
assignments_csv = self.export_category_assignments_csv()
# Print summary
logger.info("\n" + "="*70)
logger.info("CATEGORY MANAGEMENT SUMMARY")
logger.info("="*70)
total_categories = sum(len(cats) for cats in self.categories_by_site.values())
logger.info(f"Total current categories: {total_categories}")
total_proposed = sum(len(props) for props in self.proposed_categories.values())
logger.info(f"Total proposed categories: {total_proposed}")
logger.info(f"Category assignment proposals: {len(self.category_assignments)}")
# AI Advisor stats
logger.info(f"AI API calls made: {self.ai_advisor.api_calls}")
logger.info(f"AI cost: ${self.ai_advisor.ai_cost:.4f}")
logger.info(f"\n{''*70}")
logger.info("Exported files:")
logger.info(f" • Current categories: {categories_csv}")
logger.info(f" • Proposed categories: {proposed_csv}")
logger.info(f" • Category assignments: {assignments_csv}")
logger.info(f"{''*70}")
logger.info(f"\n✓ Category management complete!")
logger.info(f"\nNext steps:")
logger.info(f" 1. Review proposed_categories.csv for new categories to add")
logger.info(f" 2. Review category_assignments.csv for posts that need re-categorization")
logger.info(f" 3. Manually approve or modify proposals before applying changes")
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Manage WordPress categories across multiple sites'
)
args = parser.parse_args()
manager = CategoryManager()
manager.run()
if __name__ == '__main__':
main()

View File

@@ -1,110 +0,0 @@
"""
Configuration module for WordPress SEO automation.
Loads and validates environment variables and YAML configuration.
"""
import os
import yaml
from dotenv import load_dotenv
from pathlib import Path
# Load environment variables from .env file
load_dotenv()
class Config:
"""Configuration class for WordPress SEO automation."""
# Load configuration from YAML file
CONFIG_FILE = Path(__file__).parent.parent / 'config.yaml'
if CONFIG_FILE.exists():
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
YAML_CONFIG = yaml.safe_load(f)
else:
YAML_CONFIG = {}
# WordPress Settings (Primary site)
WORDPRESS_URL = os.getenv('WORDPRESS_URL', YAML_CONFIG.get('primary_site', {}).get('url', '')).rstrip('/')
WORDPRESS_USERNAME = os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('primary_site', {}).get('username', ''))
WORDPRESS_APP_PASSWORD = os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('primary_site', {}).get('password', ''))
# Multi-site WordPress Configuration
WORDPRESS_SITES = {
'mistergeek.net': {
'url': os.getenv('WORDPRESS_MISTERGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('url', 'https://www.mistergeek.net')),
'username': os.getenv('WORDPRESS_MISTERGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('username', ''))),
'password': os.getenv('WORDPRESS_MISTERGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('password', ''))),
},
'webscroll.fr': {
'url': os.getenv('WORDPRESS_WEBSCROLL_URL', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('url', 'https://www.webscroll.fr')),
'username': os.getenv('WORDPRESS_WEBSCROLL_USERNAME', os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('username', ''))),
'password': os.getenv('WORDPRESS_WEBSCROLL_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('password', ''))),
},
'hellogeek.net': {
'url': os.getenv('WORDPRESS_HELLOGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('url', 'https://www.hellogeek.net')),
'username': os.getenv('WORDPRESS_HELLOGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('username', ''))),
'password': os.getenv('WORDPRESS_HELLOGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('password', ''))),
}
}
# OpenRouter API Settings
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', YAML_CONFIG.get('ai_model', {}).get('api_key', ''))
AI_MODEL = os.getenv('AI_MODEL', YAML_CONFIG.get('ai_model', {}).get('name', 'anthropic/claude-3.5-sonnet'))
# Script Settings
BATCH_SIZE = int(os.getenv('BATCH_SIZE', str(YAML_CONFIG.get('script_settings', {}).get('batch_size', 100))))
API_DELAY_SECONDS = float(os.getenv('API_DELAY_SECONDS', str(YAML_CONFIG.get('script_settings', {}).get('api_delay_seconds', 0.5))))
# Analysis Settings
ANALYSIS_MIN_POSITION = int(os.getenv('ANALYSIS_MIN_POSITION', str(YAML_CONFIG.get('analysis_settings', {}).get('min_position', 11))))
ANALYSIS_MAX_POSITION = int(os.getenv('ANALYSIS_MAX_POSITION', str(YAML_CONFIG.get('analysis_settings', {}).get('max_position', 30))))
ANALYSIS_MIN_IMPRESSIONS = int(os.getenv('ANALYSIS_MIN_IMPRESSIONS', str(YAML_CONFIG.get('analysis_settings', {}).get('min_impressions', 50))))
ANALYSIS_TOP_N_POSTS = int(os.getenv('ANALYSIS_TOP_N_POSTS', str(YAML_CONFIG.get('analysis_settings', {}).get('top_n_posts', 20))))
# Output directory
OUTPUT_DIR = Path(os.getenv('OUTPUT_DIR', YAML_CONFIG.get('output_settings', {}).get('output_dir', './output')))
@classmethod
def validate(cls):
"""Validate that all required configuration is present."""
errors = []
if not cls.WORDPRESS_URL:
errors.append("WORDPRESS_URL is required")
if not cls.WORDPRESS_USERNAME:
errors.append("WORDPRESS_USERNAME is required")
if not cls.WORDPRESS_APP_PASSWORD:
errors.append("WORDPRESS_APP_PASSWORD is required")
if not cls.OPENROUTER_API_KEY:
errors.append("OPENROUTER_API_KEY is required (get one from https://openrouter.ai/)")
if errors:
raise ValueError("Configuration errors:\n" + "\n".join(f" - {e}" for e in errors))
# Create output directory if it doesn't exist
cls.OUTPUT_DIR.mkdir(exist_ok=True)
return True
@classmethod
def get_wordpress_auth(cls):
"""Get WordPress authentication tuple."""
return (cls.WORDPRESS_USERNAME, cls.WORDPRESS_APP_PASSWORD)
@classmethod
def get_api_base_url(cls):
"""Get WordPress REST API base URL."""
return f"{cls.WORDPRESS_URL}/wp-json/wp/v2"
@classmethod
def get_site_config(cls, site_name):
"""Get configuration for a specific site."""
return cls.WORDPRESS_SITES.get(site_name, {})
@classmethod
def get_all_sites(cls):
"""Get all configured WordPress sites."""
return cls.WORDPRESS_SITES.keys()

View File

@@ -1,348 +0,0 @@
"""
Content gap analyzer for SEO strategy.
Identifies missing topics and content opportunities using AI analysis.
"""
import csv
import json
import argparse
import time
from pathlib import Path
from collections import defaultdict
from openai import OpenAI
from config import Config
class ContentGapAnalyzer:
"""Identify content gaps and opportunities."""
def __init__(self):
"""Initialize analyzer."""
self.config = Config
self.output_dir = self.config.OUTPUT_DIR
self.logs = []
self.client = None
if self.config.OPENROUTER_API_KEY:
self.client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=self.config.OPENROUTER_API_KEY,
)
def log(self, message):
"""Add message to log."""
self.logs.append(message)
print(message)
def load_posts(self, posts_csv):
"""Load post titles and data."""
posts = []
if not posts_csv.exists():
self.log(f"❌ File not found: {posts_csv}")
return posts
try:
with open(posts_csv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
posts.append({
'id': row.get('ID', ''),
'title': row.get('Title', ''),
'url': row.get('URL', ''),
'traffic': int(row.get('traffic', 0) or 0),
'impressions': int(row.get('impressions', 0) or 0),
'top_keywords': row.get('top_keywords', '')
})
self.log(f"✓ Loaded {len(posts)} posts")
except Exception as e:
self.log(f"❌ Error reading posts: {e}")
return posts
def load_gsc_data(self, gsc_csv):
"""Load Search Console queries for gap analysis."""
queries = []
if not gsc_csv.exists():
self.log(f"⚠️ GSC file not found: {gsc_csv}")
return queries
try:
with open(gsc_csv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
try:
query = row.get('Query', '').strip()
if not query:
continue
impressions = int(row.get('Impressions', 0) or 0)
clicks = int(row.get('Clicks', 0) or 0)
# Only include queries with impressions but low clicks
if impressions > 0 and (clicks / impressions < 0.05):
queries.append({
'query': query,
'impressions': impressions,
'clicks': clicks,
'ctr': clicks / impressions if impressions > 0 else 0
})
except (ValueError, TypeError):
continue
self.log(f"✓ Loaded {len(queries)} underperforming queries")
except Exception as e:
self.log(f"⚠️ Error reading GSC file: {e}")
return queries
def extract_topics(self, posts):
"""Extract topic clusters from post titles using AI."""
if not self.client or len(posts) == 0:
self.log("⚠️ Cannot extract topics without AI client or posts")
return {}
try:
self.log("🤖 Extracting topic clusters from post titles...")
# Batch posts into groups
titles = [p['title'] for p in posts][:100] # Limit to first 100
prompt = f"""Analyze these {len(titles)} blog post titles and identify topic clusters:
Titles:
{chr(10).join(f'{i+1}. {t}' for i, t in enumerate(titles))}
Extract for each post:
1. Primary topic category
2. Subtopics covered
3. Content type (guide, tutorial, review, comparison, etc.)
Then identify:
1. Top 10 topic clusters with post counts
2. Most common subtopics
3. Over/under-represented topics
Return JSON:
{{
"post_topics": {{
"1": {{"primary": "...", "subtopics": ["..."], "type": "..."}},
...
}},
"topic_clusters": [
{{"cluster": "...", "post_count": 0, "importance": "high/medium/low"}}
],
"coverage_gaps": ["topic 1", "topic 2", ...],
"niche": "detected niche or industry"
}}"""
response = self.client.chat.completions.create(
model=self.config.AI_MODEL,
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=1500
)
try:
result_text = response.choices[0].message.content
start_idx = result_text.find('{')
end_idx = result_text.rfind('}') + 1
if start_idx >= 0 and end_idx > start_idx:
return json.loads(result_text[start_idx:end_idx])
except json.JSONDecodeError:
self.log("⚠️ Could not parse topic extraction response")
return {}
except Exception as e:
self.log(f"⚠️ Topic extraction failed: {e}")
return {}
def identify_content_gaps(self, topic_analysis, queries):
"""Use AI to identify content gaps and suggest new topics."""
if not self.client:
return []
try:
self.log("🤖 Identifying content gaps and opportunities...")
clusters = topic_analysis.get('topic_clusters', [])
gaps = topic_analysis.get('coverage_gaps', [])
niche = topic_analysis.get('niche', 'general')
# Prepare query analysis
top_queries = sorted(queries, key=lambda x: x['impressions'], reverse=True)[:20]
queries_str = '\n'.join([f"- {q['query']} ({q['impressions']} impr, {q['ctr']:.1%} CTR)"
for q in top_queries])
prompt = f"""Based on content analysis and search demand, identify content gaps:
Existing Topics: {', '.join([c.get('cluster', '') for c in clusters[:10]])}
Coverage Gaps: {', '.join(gaps[:5])}
Niche: {niche}
Top Underperforming Queries (low CTR despite impressions):
{queries_str}
Identify high-value missing topics that could:
1. Fill coverage gaps
2. Target underperforming queries (CTR improvement)
3. Capitalize on search demand
4. Complement existing content
For each suggestion:
- Topic title
- Why it's valuable (search demand + intent)
- Search volume estimate (high/medium/low)
- How it complements existing content
- Recommended content format
- Estimated traffic potential
Prioritize by traffic opportunity. Max 20 ideas.
Return JSON:
{{
"content_opportunities": [
{{
"title": "...",
"why_valuable": "...",
"search_volume": "high/medium/low",
"complements": "existing topic",
"format": "guide/tutorial/comparison/review/list",
"traffic_potential": number,
"priority": "high/medium/low"
}}
]
}}"""
response = self.client.chat.completions.create(
model=self.config.AI_MODEL,
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=2000
)
try:
result_text = response.choices[0].message.content
start_idx = result_text.find('{')
end_idx = result_text.rfind('}') + 1
if start_idx >= 0 and end_idx > start_idx:
result = json.loads(result_text[start_idx:end_idx])
return result.get('content_opportunities', [])
except json.JSONDecodeError:
self.log("⚠️ Could not parse gap analysis response")
return []
except Exception as e:
self.log(f"⚠️ Gap analysis failed: {e}")
return []
def export_gaps_csv(self, gaps, output_csv):
"""Export content gaps to CSV."""
if not gaps:
self.log("⚠️ No gaps to export")
return
try:
fieldnames = [
'priority', 'title', 'why_valuable', 'search_volume',
'complements', 'format', 'traffic_potential'
]
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
writer.writeheader()
for gap in sorted(gaps, key=lambda x: x.get('priority') == 'high', reverse=True):
writer.writerow(gap)
self.log(f"✓ Exported {len(gaps)} content gaps to {output_csv}")
except Exception as e:
self.log(f"❌ Error exporting CSV: {e}")
def export_topic_clusters_json(self, topic_analysis, output_json):
"""Export topic analysis to JSON."""
if not topic_analysis:
return
try:
with open(output_json, 'w', encoding='utf-8') as f:
json.dump(topic_analysis, f, indent=2)
self.log(f"✓ Exported topic analysis to {output_json}")
except Exception as e:
self.log(f"❌ Error exporting JSON: {e}")
def export_log(self, log_file):
"""Export analysis log."""
try:
with open(log_file, 'w', encoding='utf-8') as f:
f.write("Content Gap Analysis Report\n")
f.write("=" * 60 + "\n\n")
for msg in self.logs:
f.write(msg + "\n")
self.log(f"✓ Exported log to {log_file}")
except Exception as e:
self.log(f"❌ Error exporting log: {e}")
def run(self, posts_csv, gsc_csv, output_csv):
"""Run complete analysis workflow."""
self.log("📊 Starting content gap analysis...")
self.log(f"Posts: {posts_csv}")
self.log(f"GSC queries: {gsc_csv}\n")
# Load data
posts = self.load_posts(posts_csv)
queries = self.load_gsc_data(gsc_csv)
if not posts:
return
# Extract topics
topic_analysis = self.extract_topics(posts)
if topic_analysis:
self.log(f"✓ Identified {len(topic_analysis.get('topic_clusters', []))} topic clusters")
# Identify gaps
gaps = self.identify_content_gaps(topic_analysis, queries)
if gaps:
self.log(f"✓ Identified {len(gaps)} content opportunities")
# Export
self.log("\n📁 Exporting results...")
self.export_gaps_csv(gaps, output_csv)
topic_json = self.output_dir / 'topic_clusters.json'
self.export_topic_clusters_json(topic_analysis, topic_json)
# Export log
log_dir = self.output_dir / 'logs'
log_dir.mkdir(exist_ok=True)
log_file = log_dir / 'content_gap_analysis_log.txt'
self.export_log(log_file)
self.log("\n✓ Content gap analysis complete!")
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(description='Analyze content gaps')
parser.add_argument('--posts-csv', type=Path,
default=Path('output/results/posts_with_analytics.csv'),
help='Posts CSV')
parser.add_argument('--gsc-queries', type=Path,
default=Path('input/analytics/gsc/Requêtes.csv'),
help='GSC queries CSV')
parser.add_argument('--output', type=Path,
default=Path('output/results/content_gaps.csv'),
help='Output gaps CSV')
args = parser.parse_args()
analyzer = ContentGapAnalyzer()
analyzer.run(args.posts_csv, args.gsc_queries, args.output)
if __name__ == '__main__':
main()

View File

@@ -1,466 +0,0 @@
"""
Multi-Site Content Strategy Analyzer
Analyzes all content (published + drafts) across 3 websites.
Recommends optimal distribution and consolidation strategy.
"""
import csv
import json
import argparse
from pathlib import Path
from collections import defaultdict
from datetime import datetime
class ContentStrategyAnalyzer:
"""Analyze and optimize content distribution across multiple sites."""
def __init__(self):
"""Initialize analyzer."""
self.output_dir = Path('output')
self.output_dir.mkdir(exist_ok=True)
(self.output_dir / 'analysis').mkdir(exist_ok=True)
(self.output_dir / 'reports').mkdir(exist_ok=True)
(self.output_dir / 'logs').mkdir(exist_ok=True)
self.logs = []
def log(self, message):
"""Log message."""
self.logs.append(message)
print(message)
def load_wordpress_posts(self, csv_path):
"""Load published WordPress posts."""
posts = {}
if not csv_path.exists():
self.log(f"⚠️ WordPress posts file not found: {csv_path}")
return posts
try:
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
post_id = row.get('ID') or row.get('post_id')
if not post_id:
continue
posts[post_id] = {
'source': 'wordpress',
'status': 'published',
'title': row.get('Title') or row.get('title') or row.get('post_title') or '',
'url': row.get('URL') or row.get('url') or row.get('post_url') or '',
'author': row.get('Author') or row.get('author') or 'Unknown',
'traffic': int(row.get('traffic', 0) or 0),
'impressions': int(row.get('impressions', 0) or 0),
'position': float(row.get('avg_position', 0) or 0),
'category': row.get('Category') or row.get('category') or '',
}
self.log(f"✓ Loaded {len(posts)} published WordPress posts")
except Exception as e:
self.log(f"❌ Error reading WordPress posts: {e}")
return posts
def load_draft_posts(self, csv_path):
"""Load draft/unpublished posts."""
posts = {}
if not csv_path.exists():
self.log(f"⚠️ Draft posts file not found: {csv_path}")
return posts
try:
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
post_id = row.get('ID') or row.get('post_id')
if not post_id:
continue
posts[post_id] = {
'source': 'draft',
'status': 'draft',
'title': row.get('Title') or row.get('title') or row.get('post_title') or '',
'url': row.get('URL') or row.get('url') or row.get('post_url') or '',
'author': row.get('Author') or row.get('author') or 'Unknown',
'traffic': 0, # Drafts have no traffic
'impressions': 0,
'position': 0,
'category': row.get('Category') or row.get('category') or '',
}
self.log(f"✓ Loaded {len(posts)} draft posts")
except Exception as e:
self.log(f"❌ Error reading draft posts: {e}")
return posts
def classify_post_topic(self, post):
"""Classify post into topic area."""
title = post['title'].lower()
category = post['category'].lower()
content = f"{title} {category}"
# Topic classification based on keywords
topic_keywords = {
'torrent': ['torrent', 'ygg', 'ratio', 'tracker', 'magnet', 'seedbox', 'upload'],
'streaming': ['stream', 'film', 'série', 'netflix', 'disney', 'platforma'],
'vpn': ['vpn', 'proxy', 'anonyme', 'privacy', 'chiffr'],
'software': ['software', 'tool', 'app', 'logiciel', 'outil', 'program'],
'gaming': ['game', 'jeu', 'gaming', 'emula', 'console', 'retro'],
'download': ['download', 'télécharge', 'ddl', 'upload'],
'tech': ['tech', 'informatique', 'code', 'programming', 'developer'],
'other': [],
}
for topic, keywords in topic_keywords.items():
if topic == 'other':
continue
for keyword in keywords:
if keyword in content:
return topic
return 'other'
def classify_website(self, post):
"""Determine which website this post should be on."""
topic = self.classify_post_topic(post)
author = post.get('author', '').strip()
is_sponsored = author == 'Expert'
# Website assignment rules
if topic == 'torrent' or topic == 'download':
return {
'site': 'webscroll.fr',
'reason': f'Torrent/file-sharing content',
'priority': 'HIGH' if post['traffic'] > 100 else 'MEDIUM'
}
if topic in ['vpn', 'software', 'gaming', 'tech']:
return {
'site': 'mistergeek.net',
'reason': f'{topic.capitalize()} - core content',
'priority': 'HIGH' if post['traffic'] > 50 else 'MEDIUM'
}
if topic == 'streaming' and post['traffic'] < 100:
return {
'site': 'hellogeek.net',
'reason': 'Low-traffic streaming content',
'priority': 'LOW'
}
if topic == 'other' or post['traffic'] < 10:
return {
'site': 'hellogeek.net',
'reason': 'Off-brand or low-traffic content',
'priority': 'LOW'
}
# Default to main site
return {
'site': 'mistergeek.net',
'reason': 'Core content',
'priority': 'MEDIUM'
}
def classify_content_action(self, post):
"""Determine what action to take with this post."""
topic = self.classify_post_topic(post)
traffic = post.get('traffic', 0)
impressions = post.get('impressions', 0)
position = post.get('position', 0)
status = post.get('status', 'published')
# Determine action
if status == 'draft':
if traffic == 0:
return 'REVIEW_PUBLISH_OR_DELETE' # Unpublished draft
else:
return 'REPUBLISH' # Was published, now draft
if traffic < 5 and impressions < 20:
return 'DELETE_OR_CONSOLIDATE'
if traffic > 0 and position > 0 and position < 11:
return 'KEEP_OPTIMIZE'
if position > 11 and position < 30:
return 'KEEP_OPTIMIZE'
if position > 30 or traffic < 10:
return 'MOVE_TO_OTHER_SITE'
return 'KEEP_MONITOR'
def analyze_all_content(self, posts):
"""Analyze and classify all posts."""
analysis = {
'total_posts': len(posts),
'by_site': defaultdict(lambda: {'count': 0, 'traffic': 0, 'posts': []}),
'by_topic': defaultdict(lambda: {'count': 0, 'traffic': 0, 'posts': []}),
'by_action': defaultdict(lambda: {'count': 0, 'traffic': 0, 'posts': []}),
'sponsored_posts': {'count': 0, 'traffic': 0, 'posts': []},
'draft_posts': {'count': 0, 'posts': []},
}
for post_id, post in posts.items():
topic = self.classify_post_topic(post)
site_assignment = self.classify_website(post)
action = self.classify_content_action(post)
is_sponsored = post.get('author', '').strip() == 'Expert'
is_draft = post.get('status') == 'draft'
# Record in analysis
analysis['by_site'][site_assignment['site']]['count'] += 1
analysis['by_site'][site_assignment['site']]['traffic'] += post['traffic']
analysis['by_site'][site_assignment['site']]['posts'].append({
'id': post_id,
'title': post['title'],
'traffic': post['traffic'],
'reason': site_assignment['reason']
})
analysis['by_topic'][topic]['count'] += 1
analysis['by_topic'][topic]['traffic'] += post['traffic']
analysis['by_action'][action]['count'] += 1
analysis['by_action'][action]['traffic'] += post['traffic']
if is_sponsored:
analysis['sponsored_posts']['count'] += 1
analysis['sponsored_posts']['traffic'] += post['traffic']
analysis['sponsored_posts']['posts'].append({
'id': post_id,
'title': post['title'],
'traffic': post['traffic']
})
if is_draft:
analysis['draft_posts']['count'] += 1
analysis['draft_posts']['posts'].append({
'id': post_id,
'title': post['title'],
'status': 'draft'
})
return analysis
def generate_content_distribution_csv(self, posts, output_path):
"""Export detailed content distribution plan."""
try:
fieldnames = [
'post_id', 'title', 'topic', 'status', 'author',
'traffic', 'impressions', 'position',
'recommended_site', 'reason', 'action',
'priority', 'notes'
]
rows = []
for post_id, post in posts.items():
topic = self.classify_post_topic(post)
site_assignment = self.classify_website(post)
action = self.classify_content_action(post)
author = post.get('author', '').strip()
is_sponsored = author == 'Expert'
rows.append({
'post_id': post_id,
'title': post['title'][:80],
'topic': topic,
'status': post.get('status', 'published'),
'author': author,
'traffic': post.get('traffic', 0),
'impressions': post.get('impressions', 0),
'position': post.get('position', 0),
'recommended_site': site_assignment['site'],
'reason': site_assignment['reason'],
'action': action,
'priority': site_assignment['priority'],
'notes': 'SPONSORED' if is_sponsored else ''
})
rows.sort(key=lambda x: x['traffic'], reverse=True)
with open(output_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
self.log(f"✓ Exported {len(rows)} posts to {output_path}")
except Exception as e:
self.log(f"❌ Error exporting CSV: {e}")
def generate_strategy_report(self, analysis, output_path):
"""Generate comprehensive strategy report."""
try:
report = []
report.append("# Multi-Site Content Strategy Report\n")
report.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}*\n\n")
# Executive Summary
report.append("## Executive Summary\n\n")
report.append(f"**Total Content Analyzed:** {analysis['total_posts']} posts\n")
report.append(f"- Published: {analysis['total_posts'] - analysis['draft_posts']['count']}\n")
report.append(f"- Drafts: {analysis['draft_posts']['count']}\n")
report.append(f"- Sponsored: {analysis['sponsored_posts']['count']}\n\n")
# Distribution Strategy
report.append("## Recommended Site Distribution\n\n")
for site, data in sorted(analysis['by_site'].items(),
key=lambda x: x[1]['traffic'], reverse=True):
report.append(f"### {site}\n")
report.append(f"- Posts: {data['count']}\n")
report.append(f"- Total Traffic: {data['traffic']:,} visits/month\n")
report.append(f"- Top Posts:\n")
for post in sorted(data['posts'], key=lambda x: x['traffic'], reverse=True)[:5]:
report.append(f" - {post['title'][:60]} ({post['traffic']} visits)\n")
report.append(f"\n")
# Topic Distribution
report.append("## Content by Topic\n\n")
for topic, data in sorted(analysis['by_topic'].items(),
key=lambda x: x[1]['traffic'], reverse=True):
report.append(f"- **{topic.title()}:** {data['count']} posts ({data['traffic']:,} visits)\n")
report.append("\n")
# Actions Required
report.append("## Required Actions\n\n")
for action, data in sorted(analysis['by_action'].items(),
key=lambda x: x[1]['count'], reverse=True):
report.append(f"- **{action}:** {data['count']} posts ({data['traffic']:,} visits)\n")
report.append("\n")
# Sponsored Content
if analysis['sponsored_posts']['count'] > 0:
report.append("## Sponsored Content (by 'Expert')\n\n")
report.append(f"Total: {analysis['sponsored_posts']['count']} posts\n")
report.append(f"Traffic: {analysis['sponsored_posts']['traffic']:,} visits/month\n\n")
for post in sorted(analysis['sponsored_posts']['posts'],
key=lambda x: x['traffic'], reverse=True)[:10]:
report.append(f"- {post['title'][:70]} ({post['traffic']} visits)\n")
report.append("\n")
# Draft Posts
if analysis['draft_posts']['count'] > 0:
report.append("## Draft Posts (Unpublished)\n\n")
report.append(f"Total: {analysis['draft_posts']['count']} posts\n")
report.append("*Decision needed: Publish, delete, or move to other site?*\n\n")
for post in analysis['draft_posts']['posts'][:15]:
report.append(f"- {post['title'][:70]}\n")
report.append("\n")
# Recommendations
report.append("## Strategic Recommendations\n\n")
report.append("1. **Consolidate on mistergeek.net:**\n")
report.append(" - Keep only VPN, software, gaming, tech content\n")
report.append(" - Focus on high-traffic posts (>50 visits/month)\n\n")
report.append("2. **Move to webscroll.fr:**\n")
report.append(" - All torrent/file-sharing content\n")
report.append(" - File-specific guides\n\n")
report.append("3. **Move to hellogeek.net:**\n")
report.append(" - Low-traffic content (<50 visits)\n")
report.append(" - Off-brand content\n")
report.append(" - Experimental/niche posts\n\n")
report.append("4. **Delete:**\n")
report.append(f" - Posts with <5 visits and <20 impressions\n")
report.append(" - Duplicates/thin content\n\n")
with open(output_path, 'w', encoding='utf-8') as f:
f.write(''.join(report))
self.log(f"✓ Generated strategy report: {output_path}")
except Exception as e:
self.log(f"❌ Error generating report: {e}")
def run(self, wordpress_csv, drafts_csv):
"""Run complete content strategy analysis."""
self.log("\n" + "="*70)
self.log("Multi-Site Content Strategy Analyzer")
self.log("="*70 + "\n")
# Load posts
self.log("📚 Loading content...\n")
wordpress_posts = self.load_wordpress_posts(wordpress_csv)
draft_posts = self.load_draft_posts(drafts_csv)
# Combine all posts
all_posts = {**wordpress_posts, **draft_posts}
self.log(f"Total posts: {len(all_posts)}\n")
# Analyze
self.log("🔍 Analyzing content distribution...\n")
analysis = self.analyze_all_content(all_posts)
# Generate outputs
self.log("📊 Generating outputs...\n")
output_csv = self.output_dir / 'analysis' / 'content_distribution.csv'
self.generate_content_distribution_csv(all_posts, output_csv)
output_md = self.output_dir / 'reports' / 'content_strategy_report.md'
self.generate_strategy_report(analysis, output_md)
# Export analysis JSON
analysis_json = self.output_dir / 'analysis' / 'analysis_summary.json'
try:
with open(analysis_json, 'w', encoding='utf-8') as f:
# Convert defaultdict to regular dict for JSON serialization
analysis_clean = {
'total_posts': analysis['total_posts'],
'by_site': dict(analysis['by_site']),
'by_topic': {k: {'count': v['count'], 'traffic': v['traffic']}
for k, v in analysis['by_topic'].items()},
'by_action': {k: {'count': v['count'], 'traffic': v['traffic']}
for k, v in analysis['by_action'].items()},
'sponsored_posts': {
'count': analysis['sponsored_posts']['count'],
'traffic': analysis['sponsored_posts']['traffic']
},
'draft_posts': {
'count': analysis['draft_posts']['count']
}
}
json.dump(analysis_clean, f, indent=2, ensure_ascii=False)
self.log(f"✓ Exported analysis JSON: {analysis_json}\n")
except Exception as e:
self.log(f"❌ Error exporting JSON: {e}\n")
# Summary
self.log("\n" + "="*70)
self.log("ANALYSIS COMPLETE")
self.log("="*70)
self.log(f"\nOutputs:")
self.log(f" Distribution: {output_csv}")
self.log(f" Strategy: {output_md}")
self.log(f" Summary: {analysis_json}\n")
self.log("Next steps:")
self.log(" 1. Review content_strategy_report.md")
self.log(" 2. Review content_distribution.csv")
self.log(" 3. Decide: which posts go to which site?")
self.log(" 4. Plan content consolidation")
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(description='Analyze content across multiple sites')
parser.add_argument('--wordpress-csv', type=Path,
default=Path('input/wordpress/new-propositions.csv'),
help='WordPress posts CSV')
parser.add_argument('--drafts-csv', type=Path,
default=Path('input/drafts/drafts.csv'),
help='Draft posts CSV')
args = parser.parse_args()
analyzer = ContentStrategyAnalyzer()
analyzer.run(args.wordpress_csv, args.drafts_csv)
if __name__ == '__main__':
main()

View File

@@ -1,375 +0,0 @@
#!/usr/bin/env python3
"""
Enhanced AI Analyzer - Selective analysis with in-place updates
Analyzes posts and updates CSV with AI recommendations for:
- Title optimization
- Meta description optimization
- Category suggestions
- Site placement recommendations
"""
import csv
import json
import logging
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import requests
from datetime import datetime
from config import Config
logger = logging.getLogger(__name__)
class EnhancedPostAnalyzer:
"""Enhanced analyzer with selective column analysis and in-place updates."""
def __init__(self, csv_file: str, analyze_fields: Optional[List[str]] = None):
"""
Initialize analyzer.
Args:
csv_file: Path to input CSV
analyze_fields: List of fields to analyze ['title', 'meta_description', 'categories', 'site']
If None, analyzes all fields
"""
self.csv_file = Path(csv_file)
self.openrouter_api_key = Config.OPENROUTER_API_KEY
self.ai_model = Config.AI_MODEL
self.posts = []
self.analyzed_posts = []
self.api_calls = 0
self.ai_cost = 0.0
# Default: analyze all fields
if analyze_fields is None:
self.analyze_fields = ['title', 'meta_description', 'categories', 'site']
else:
self.analyze_fields = analyze_fields
logger.info(f"Fields to analyze: {', '.join(self.analyze_fields)}")
def load_csv(self) -> bool:
"""Load posts from CSV file."""
logger.info(f"Loading CSV: {self.csv_file}")
if not self.csv_file.exists():
logger.error(f"CSV file not found: {self.csv_file}")
return False
try:
with open(self.csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
self.posts = list(reader)
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
return True
except Exception as e:
logger.error(f"Error loading CSV: {e}")
return False
def get_ai_recommendations(self, batch: List[Dict], fields: List[str]) -> Optional[str]:
"""Get AI recommendations for specific fields."""
if not self.openrouter_api_key:
logger.error("OPENROUTER_API_KEY not set")
return None
# Format posts for AI
formatted_posts = []
for i, post in enumerate(batch, 1):
post_text = f"{i}. POST ID: {post['post_id']}\n"
post_text += f" Site: {post.get('site', '')}\n"
if 'title' in fields:
post_text += f" Title: {post.get('title', '')}\n"
if 'meta_description' in fields:
post_text += f" Meta Description: {post.get('meta_description', '')}\n"
if 'categories' in fields:
post_text += f" Categories: {post.get('categories', '')}\n"
if 'content_preview' in post:
post_text += f" Content Preview: {post.get('content_preview', '')[:300]}...\n"
formatted_posts.append(post_text)
posts_text = "\n".join(formatted_posts)
# Build prompt based on requested fields
prompt_parts = ["Analyze these blog posts and provide recommendations.\n\n"]
if 'site' in fields:
prompt_parts.append("""Website Strategy:
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
- webscroll.fr: Torrenting, File-Sharing, Tracker guides
- hellogeek.net: Low-traffic, experimental, off-brand content
""")
prompt_parts.append(posts_text)
prompt_parts.append("\nFor EACH post, provide a JSON object with:\n{\n")
if 'title' in fields:
prompt_parts.append(' "proposed_title": "<Improved SEO title>",\n')
prompt_parts.append(' "title_reason": "<Reason for title change>",\n')
if 'meta_description' in fields:
prompt_parts.append(' "proposed_meta_description": "<Improved meta description (120-160 chars)>",\n')
prompt_parts.append(' "meta_reason": "<Reason for meta description change>",\n')
if 'categories' in fields:
prompt_parts.append(' "proposed_category": "<Best category>",\n')
prompt_parts.append(' "category_reason": "<Reason for category change>",\n')
if 'site' in fields:
prompt_parts.append(' "proposed_site": "<Best site for this post>",\n')
prompt_parts.append(' "site_reason": "<Reason for site recommendation>",\n')
prompt_parts.append(' "confidence": "<High|Medium|Low>",\n')
prompt_parts.append(' "priority": "<High|Medium|Low>"\n}')
prompt_parts.append("\nReturn ONLY a JSON array of objects, one per post.")
prompt = "".join(prompt_parts)
try:
logger.info(f" Sending batch to AI for analysis...")
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.openrouter_api_key}",
"Content-Type": "application/json",
},
json={
"model": self.ai_model,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3,
},
timeout=60
)
response.raise_for_status()
result = response.json()
self.api_calls += 1
# Track cost
usage = result.get('usage', {})
input_tokens = usage.get('prompt_tokens', 0)
output_tokens = usage.get('completion_tokens', 0)
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
recommendations_text = result['choices'][0]['message']['content'].strip()
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
return recommendations_text
except Exception as e:
logger.error(f"Error getting AI recommendations: {e}")
return None
def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
"""Parse JSON recommendations from AI."""
try:
start_idx = recommendations_json.find('[')
end_idx = recommendations_json.rfind(']') + 1
if start_idx == -1 or end_idx == 0:
logger.error("Could not find JSON array in response")
return []
json_str = recommendations_json[start_idx:end_idx]
recommendations = json.loads(json_str)
return recommendations
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON recommendations: {e}")
return []
def analyze_posts(self, batch_size: int = 10) -> bool:
"""Analyze all posts in batches."""
logger.info("\n" + "="*70)
logger.info("ANALYZING POSTS WITH AI")
logger.info("="*70 + "\n")
batches = [self.posts[i:i + batch_size] for i in range(0, len(self.posts), batch_size)]
logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches...\n")
all_recommendations = {}
for batch_num, batch in enumerate(batches, 1):
logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
recommendations_json = self.get_ai_recommendations(batch, self.analyze_fields)
if not recommendations_json:
logger.error(f" Failed to get recommendations for batch {batch_num}")
continue
recommendations = self.parse_recommendations(recommendations_json)
for rec in recommendations:
all_recommendations[str(rec.get('post_id', ''))] = rec
logger.info(f" ✓ Got {len(recommendations)} recommendations")
logger.info(f"\n✓ Analysis complete!")
logger.info(f" Total recommendations: {len(all_recommendations)}")
logger.info(f" API calls: {self.api_calls}")
logger.info(f" Estimated cost: ${self.ai_cost:.4f}")
# Map recommendations to posts
for post in self.posts:
post_id = str(post['post_id'])
if post_id in all_recommendations:
rec = all_recommendations[post_id]
# Add only requested fields
if 'title' in self.analyze_fields:
post['proposed_title'] = rec.get('proposed_title', post.get('title', ''))
post['title_reason'] = rec.get('title_reason', '')
if 'meta_description' in self.analyze_fields:
post['proposed_meta_description'] = rec.get('proposed_meta_description', post.get('meta_description', ''))
post['meta_reason'] = rec.get('meta_reason', '')
if 'categories' in self.analyze_fields:
post['proposed_category'] = rec.get('proposed_category', post.get('categories', ''))
post['category_reason'] = rec.get('category_reason', '')
if 'site' in self.analyze_fields:
post['proposed_site'] = rec.get('proposed_site', post.get('site', ''))
post['site_reason'] = rec.get('site_reason', '')
# Common fields
post['ai_confidence'] = rec.get('confidence', 'Medium')
post['ai_priority'] = rec.get('priority', 'Medium')
else:
# Add empty fields for consistency
if 'title' in self.analyze_fields:
post['proposed_title'] = post.get('title', '')
post['title_reason'] = 'No AI recommendation'
if 'meta_description' in self.analyze_fields:
post['proposed_meta_description'] = post.get('meta_description', '')
post['meta_reason'] = 'No AI recommendation'
if 'categories' in self.analyze_fields:
post['proposed_category'] = post.get('categories', '')
post['category_reason'] = 'No AI recommendation'
if 'site' in self.analyze_fields:
post['proposed_site'] = post.get('site', '')
post['site_reason'] = 'No AI recommendation'
post['ai_confidence'] = 'Unknown'
post['ai_priority'] = 'Medium'
self.analyzed_posts.append(post)
return len(self.analyzed_posts) > 0
def export_results(self, output_file: Optional[str] = None, update_input: bool = False) -> str:
"""
Export results to CSV.
Args:
output_file: Custom output path
update_input: If True, update the input CSV file (creates backup)
Returns:
Path to exported file
"""
if update_input:
# Create backup of original file
backup_file = self.csv_file.parent / f"{self.csv_file.stem}_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
import shutil
shutil.copy2(self.csv_file, backup_file)
logger.info(f"✓ Created backup: {backup_file}")
output_file = self.csv_file
elif not output_file:
output_dir = Path(__file__).parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = output_dir / f'analyzed_posts_{timestamp}.csv'
output_file = Path(output_file)
output_file.parent.mkdir(parents=True, exist_ok=True)
if not self.analyzed_posts:
logger.error("No analyzed posts to export")
return ""
# Build fieldnames - original fields + new fields
original_fields = list(self.analyzed_posts[0].keys())
# Determine which new fields were added
new_fields = []
if 'title' in self.analyze_fields:
new_fields.extend(['proposed_title', 'title_reason'])
if 'meta_description' in self.analyze_fields:
new_fields.extend(['proposed_meta_description', 'meta_reason'])
if 'categories' in self.analyze_fields:
new_fields.extend(['proposed_category', 'category_reason'])
if 'site' in self.analyze_fields:
new_fields.extend(['proposed_site', 'site_reason'])
new_fields.extend(['ai_confidence', 'ai_priority'])
fieldnames = original_fields + new_fields
logger.info(f"\nExporting results to: {output_file}")
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self.analyzed_posts)
logger.info(f"✓ Exported {len(self.analyzed_posts)} posts")
return str(output_file)
def run(self, output_file: Optional[str] = None, update_input: bool = False, batch_size: int = 10) -> str:
"""Run complete analysis."""
if not self.load_csv():
sys.exit(1)
if not self.analyze_posts(batch_size=batch_size):
logger.error("Failed to analyze posts")
sys.exit(1)
return self.export_results(output_file=output_file, update_input=update_input)
def main():
"""Main entry point with argument parsing."""
import argparse
parser = argparse.ArgumentParser(
description='Enhanced AI analyzer with selective field analysis'
)
parser.add_argument('csv_file', help='Input CSV file')
parser.add_argument('--output', '-o', help='Output CSV file (default: creates new file in output/)')
parser.add_argument('--update', '-u', action='store_true', help='Update input CSV file (creates backup)')
parser.add_argument('--fields', '-f', nargs='+',
choices=['title', 'meta_description', 'categories', 'site'],
help='Fields to analyze (default: all fields)')
parser.add_argument('--batch-size', type=int, default=10, help='Batch size for AI analysis')
args = parser.parse_args()
analyzer = EnhancedPostAnalyzer(args.csv_file, analyze_fields=args.fields)
output_file = analyzer.run(
output_file=args.output,
update_input=args.update,
batch_size=args.batch_size
)
logger.info(f"\n✓ Analysis complete! Results saved to: {output_file}")
if __name__ == '__main__':
main()

View File

@@ -1,378 +0,0 @@
#!/usr/bin/env python3
"""
Export All Posts to CSV for AI Decision Making
Fetches complete post data from all 3 WordPress sites and exports to CSV
for AI-powered categorization and movement recommendations.
Uses credentials from .env file for secure authentication.
"""
import csv
import logging
import sys
from pathlib import Path
from typing import Dict, List, Optional
import requests
from requests.auth import HTTPBasicAuth
import time
from datetime import datetime
import re
from config import Config
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class PostExporter:
"""Export posts from WordPress sites to CSV for AI analysis."""
def __init__(self):
"""Initialize the exporter with sites from Config."""
self.sites = Config.WORDPRESS_SITES
self.all_posts = []
self.category_cache = {} # Cache category names by site
def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
"""
Fetch ALL posts from a site with full details.
Args:
site_name: Website name
site_config: Site configuration dict
Returns:
List of posts with full metadata
"""
logger.info(f"\nFetching posts from {site_name}...")
posts = []
page = 1
base_url = site_config['url'].rstrip('/')
api_url = f"{base_url}/wp-json/wp/v2/posts"
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
for status in ['publish', 'draft']:
page = 1
status_count = 0
while True:
params = {
'page': page,
'per_page': 100,
'status': status,
}
try:
logger.info(f" Fetching page {page} ({status} posts)...")
response = requests.get(api_url, params=params, auth=auth, timeout=10)
response.raise_for_status()
page_posts = response.json()
if not page_posts:
break
posts.extend(page_posts)
status_count += len(page_posts)
logger.info(f" ✓ Got {len(page_posts)} posts (total: {len(posts)})")
page += 1
time.sleep(0.5)
except requests.exceptions.HTTPError as e:
if response.status_code == 400:
logger.info(f" API limit reached (got {status_count} {status} posts)")
break
else:
logger.error(f"Error on page {page}: {e}")
break
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching from {site_name}: {e}")
break
if status_count > 0:
logger.info(f" ✓ Total {status} posts: {status_count}")
logger.info(f"✓ Total posts from {site_name}: {len(posts)}\n")
return posts
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, str]:
"""
Fetch category names and slugs from a WordPress site.
Args:
site_name: Website name
site_config: Site configuration dict
Returns:
Dict mapping category IDs to category names
"""
if site_name in self.category_cache:
return self.category_cache[site_name]
logger.info(f" Fetching categories from {site_name}...")
categories = {}
base_url = site_config['url'].rstrip('/')
api_url = f"{base_url}/wp-json/wp/v2/categories"
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
try:
# Fetch all categories (per_page=100)
params = {'per_page': 100}
response = requests.get(api_url, params=params, auth=auth, timeout=10)
response.raise_for_status()
cat_list = response.json()
for cat in cat_list:
categories[cat['id']] = {
'name': cat.get('name', ''),
'slug': cat.get('slug', ''),
}
logger.info(f" ✓ Fetched {len(categories)} categories")
except Exception as e:
logger.warning(f" Could not fetch categories from {site_name}: {e}")
self.category_cache[site_name] = categories
return categories
def extract_post_details(self, post: Dict, site_name: str, category_map: Dict[int, Dict]) -> Dict:
"""
Extract all relevant details from a post for AI analysis.
Args:
post: WordPress post object
site_name: Website name
category_map: Dict mapping category IDs to names
Returns:
Dict with extracted post details
"""
# Title
title = post.get('title', {})
if isinstance(title, dict):
title = title.get('rendered', '')
# Content (first 500 chars for context)
content = post.get('content', {})
if isinstance(content, dict):
content = content.get('rendered', '')
# Strip HTML tags for readability
content_text = re.sub('<[^<]+?>', '', content)[:500]
# Excerpt
excerpt = post.get('excerpt', {})
if isinstance(excerpt, dict):
excerpt = excerpt.get('rendered', '')
excerpt_text = re.sub('<[^<]+?>', '', excerpt)
# Meta descriptions and SEO data
meta_dict = post.get('meta', {}) if isinstance(post.get('meta'), dict) else {}
rank_math_title = meta_dict.get('rank_math_title', '')
rank_math_description = meta_dict.get('rank_math_description', '')
rank_math_keyword = meta_dict.get('rank_math_focus_keyword', '')
yoast_description = meta_dict.get('_yoast_wpseo_metadesc', '')
meta_description = rank_math_description or yoast_description or ''
# Categories - convert IDs to names using category_map
category_ids = post.get('categories', [])
category_names = ', '.join([
category_map.get(cat_id, {}).get('name', str(cat_id))
for cat_id in category_ids
]) if category_ids else ''
# Tags
tags = post.get('tags', [])
tag_names = ', '.join([str(t) for t in tags]) if tags else ''
# Author
author_id = post.get('author', '')
# Date
date_published = post.get('date', '')
date_modified = post.get('modified', '')
# Status
status = post.get('status', 'publish')
# URL
url = post.get('link', '')
return {
'site': site_name,
'post_id': post['id'],
'status': status,
'title': title.strip(),
'slug': post.get('slug', ''),
'url': url,
'author_id': author_id,
'date_published': date_published,
'date_modified': date_modified,
'categories': category_names,
'tags': tag_names,
'excerpt': excerpt_text.strip(),
'content_preview': content_text.strip(),
'seo_title': rank_math_title,
'meta_description': meta_description,
'focus_keyword': rank_math_keyword,
'word_count': len(content_text.split()),
}
def export_to_csv(self, output_file: Optional[str] = None) -> str:
"""
Export all posts to CSV.
Args:
output_file: Optional custom output path
Returns:
Path to exported CSV file
"""
if not output_file:
output_dir = Path(__file__).parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
date_str = datetime.now().strftime('%Y-%m-%d')
output_file = output_dir / f'all_posts_{date_str}.csv'
output_file = Path(output_file)
output_file.parent.mkdir(parents=True, exist_ok=True)
if not self.all_posts:
logger.error("No posts to export")
return None
fieldnames = [
'site',
'post_id',
'status',
'title',
'slug',
'url',
'author_id',
'date_published',
'date_modified',
'categories',
'tags',
'excerpt',
'content_preview',
'seo_title',
'meta_description',
'focus_keyword',
'word_count',
]
logger.info(f"Exporting {len(self.all_posts)} posts to CSV...")
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for post in self.all_posts:
writer.writerow({field: post.get(field, '') for field in fieldnames})
logger.info(f"✓ CSV exported to: {output_file}")
return str(output_file)
def run(self):
"""Run complete export process."""
logger.info("="*70)
logger.info("EXPORTING ALL POSTS FOR AI DECISION MAKING")
logger.info("="*70)
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
logger.info("")
# Fetch from all sites
total_posts_before = len(self.all_posts)
for site_name, config in self.sites.items():
# Fetch categories for this site
categories = self.fetch_category_names(site_name, config)
# Fetch posts for this site
posts = self.fetch_posts_from_site(site_name, config)
if posts:
for post in posts:
post_details = self.extract_post_details(post, site_name, categories)
self.all_posts.append(post_details)
if not self.all_posts:
logger.error("No posts found on any site")
sys.exit(1)
# Sort by site then by post_id
self.all_posts.sort(key=lambda x: (x['site'], x['post_id']))
# Export to CSV
csv_file = self.export_to_csv()
# Print summary
logger.info("\n" + "="*70)
logger.info("EXPORT SUMMARY")
logger.info("="*70)
by_site = {}
for post in self.all_posts:
site = post['site']
if site not in by_site:
by_site[site] = {'total': 0, 'published': 0, 'draft': 0}
by_site[site]['total'] += 1
if post['status'] == 'publish':
by_site[site]['published'] += 1
else:
by_site[site]['draft'] += 1
for site, stats in sorted(by_site.items()):
logger.info(f"\n{site}:")
logger.info(f" Total: {stats['total']}")
logger.info(f" Published: {stats['published']}")
logger.info(f" Drafts: {stats['draft']}")
total_posts = len(self.all_posts)
total_published = sum(1 for p in self.all_posts if p['status'] == 'publish')
total_drafts = sum(1 for p in self.all_posts if p['status'] == 'draft')
logger.info(f"\n{''*70}")
logger.info(f"Total across all sites: {total_posts} posts")
logger.info(f" Published: {total_published}")
logger.info(f" Drafts: {total_drafts}")
logger.info(f"{''*70}")
logger.info(f"\n✓ Export complete!")
logger.info(f"✓ CSV file: {csv_file}")
logger.info(f"\nCSV includes:")
logger.info(f" • Site, Post ID, Status, Title, URL")
logger.info(f" • Publication dates, Categories, Tags")
logger.info(f" • Content preview (500 chars)")
logger.info(f" • SEO title, Meta description, Focus keyword")
logger.info(f" • Word count")
logger.info(f"\nNext step: Upload CSV to Claude or other AI for:")
logger.info(f" 1. Categorize by topic (VPN, software, gaming, torrenting, etc.)")
logger.info(f" 2. Recommend which site each post should be on")
logger.info(f" 3. Identify duplicates for consolidation")
logger.info(f" 4. Flag posts for deletion (low-traffic, thin content)")
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Export all posts from WordPress sites for AI decision making'
)
parser.add_argument(
'--output',
help='Custom output CSV file path'
)
args = parser.parse_args()
exporter = PostExporter()
exporter.run()
if __name__ == '__main__':
main()

View File

@@ -1,778 +0,0 @@
#!/usr/bin/env python3
"""
Multi-Site WordPress SEO Analyzer
Fetches posts from 3 WordPress sites, analyzes titles and meta descriptions,
and provides AI-powered optimization recommendations.
"""
import os
import csv
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import requests
from requests.auth import HTTPBasicAuth
import time
from config import Config
import sys
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class MultiSiteSEOAnalyzer:
"""Analyzes titles and meta descriptions across multiple WordPress sites."""
def __init__(self, progressive_csv: bool = True):
"""
Initialize the analyzer.
Args:
progressive_csv: If True, write CSV progressively as posts are analyzed
"""
self.sites_config = Config.WORDPRESS_SITES
self.posts_data = {}
self.analysis_results = []
self.api_calls = 0
self.ai_cost = 0.0
self.openrouter_api_key = Config.OPENROUTER_API_KEY
self.progressive_csv = progressive_csv
self.csv_file = None
self.csv_writer = None
def fetch_posts_from_site(self, site_name: str, site_config: Dict,
include_drafts: bool = False) -> List[Dict]:
"""
Fetch posts from a WordPress site using REST API.
Args:
site_name: Name of the site (domain)
site_config: Configuration dict with url, username, password
include_drafts: If True, fetch both published and draft posts
Returns:
List of posts with metadata
"""
logger.info(f"Fetching posts from {site_name}...")
posts = []
base_url = site_config['url'].rstrip('/')
api_url = f"{base_url}/wp-json/wp/v2/posts"
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
# Determine which statuses to fetch
statuses = ['publish', 'draft'] if include_drafts else ['publish']
status_str = ', '.join(statuses).replace('publish', 'published').replace('draft', 'drafts')
# Fetch each status separately to avoid 400 Bad Request on pagination
for status in statuses:
page = 1
status_count = 0
use_fields = True # Try with _fields first, fallback without if 400
while True:
params = {
'page': page,
'per_page': 100,
'status': status, # Single status per request
}
# Add _fields only if not getting 400 errors
if use_fields:
params['_fields'] = 'id,title,slug,link,meta,status'
try:
response = requests.get(api_url, params=params, auth=auth, timeout=10)
response.raise_for_status()
page_posts = response.json()
if not page_posts:
break
posts.extend(page_posts)
status_count += len(page_posts)
logger.info(f" ✓ Fetched {len(page_posts)} {status} posts (page {page})")
page += 1
time.sleep(Config.API_DELAY_SECONDS)
except requests.exceptions.HTTPError as e:
# Handle 400 errors gracefully
if response.status_code == 400 and use_fields and page == 1:
# Retry page 1 without _fields parameter
logger.info(f" ⓘ Retrying without _fields parameter...")
use_fields = False
continue
elif response.status_code == 400:
# Pagination or API limit reached
logger.info(f" ⓘ API limit reached (fetched {status_count} {status} posts)")
break
else:
logger.error(f"Error fetching page {page} from {site_name}: {e}")
break
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching from {site_name}: {e}")
break
if status_count > 0:
logger.info(f" ✓ Total {status} posts: {status_count}")
logger.info(f"✓ Total posts from {site_name} ({status_str}): {len(posts)}")
return posts
def extract_seo_data(self, post: Dict, site_name: str) -> Dict:
"""
Extract SEO-relevant data from a post.
Args:
post: Post data from WordPress API
site_name: Name of the site
Returns:
Dict with extracted SEO data
"""
title = post.get('title', {})
if isinstance(title, dict):
title = title.get('rendered', '')
# Get meta description from various SEO plugins
# Check multiple possible locations where different plugins store meta descriptions
meta_desc = ''
if isinstance(post.get('meta'), dict):
meta_dict = post['meta']
# Try various SEO plugin fields (order matters - most specific first)
meta_desc = (
meta_dict.get('_yoast_wpseo_metadesc', '') or # Yoast SEO
meta_dict.get('_rank_math_description', '') or # Rank Math
meta_dict.get('_aioseo_description', '') or # All in One SEO
meta_dict.get('description', '') or # Standard field
meta_dict.get('_meta_description', '') or # Alternative
meta_dict.get('metadesc', '') # Alternative
)
# Get post status
status = post.get('status', 'publish')
return {
'site': site_name,
'post_id': post['id'],
'title': title.strip(),
'slug': post.get('slug', ''),
'url': post.get('link', ''),
'meta_description': meta_desc.strip(),
'status': status,
}
def analyze_title(self, title: str) -> Dict:
"""
Analyze title for SEO best practices.
Args:
title: Post title
Returns:
Dict with analysis results
"""
length = len(title)
# SEO best practices
issues = []
recommendations = []
score = 100
if length < 30:
issues.append(f"Too short ({length})")
recommendations.append("Expand title to 50-60 characters")
score -= 20
elif length < 50:
recommendations.append("Could be slightly longer (target 50-60)")
score -= 5
elif length > 70:
issues.append(f"Too long ({length})")
recommendations.append("Consider shortening to 50-70 characters")
score -= 15
# Check for power words
power_words = ['best', 'ultimate', 'complete', 'essential', 'proven',
'effective', 'powerful', 'expert', 'guide', 'tutorial',
'how to', 'step by step', 'top 10', 'ultimate guide']
has_power_word = any(word.lower() in title.lower() for word in power_words)
if not has_power_word:
recommendations.append("Consider adding a power word (best, complete, guide, etc.)")
score -= 10
# Check for numbers
if not any(c.isdigit() for c in title):
recommendations.append("Consider adding a number (e.g., 'Top 5', '2025')")
score -= 5
# Check for emojis or special chars that might break rendering
special_chars = set(title) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -:')
if special_chars:
recommendations.append(f"Check special characters: {special_chars}")
score -= 5
return {
'length': length,
'issues': issues,
'recommendations': recommendations,
'score': max(0, score),
'has_power_word': has_power_word,
'has_number': any(c.isdigit() for c in title)
}
def analyze_meta_description(self, meta_desc: str) -> Dict:
"""
Analyze meta description for SEO best practices.
Args:
meta_desc: Meta description text
Returns:
Dict with analysis results
"""
length = len(meta_desc)
issues = []
recommendations = []
score = 100
if not meta_desc or length == 0:
issues.append("Missing meta description")
recommendations.append("Write a 120-160 character meta description")
score = 0
else:
if length < 100:
issues.append(f"Too short ({length})")
recommendations.append("Expand to 120-160 characters")
score -= 20
elif length < 120:
recommendations.append("Could be slightly longer (target 120-160)")
score -= 5
elif length > 160:
issues.append(f"Too long ({length})")
recommendations.append("Shorten to 120-160 characters")
score -= 15
# Check for CTA
cta_words = ['learn', 'discover', 'read', 'explore', 'find', 'get',
'download', 'check', 'see', 'watch', 'try', 'start']
has_cta = any(word.lower() in meta_desc.lower() for word in cta_words)
if not has_cta:
recommendations.append("Consider adding a call-to-action")
score -= 5
return {
'length': length,
'is_missing': not meta_desc,
'issues': issues,
'recommendations': recommendations,
'score': max(0, score),
}
def calculate_overall_score(self, title_analysis: Dict, meta_analysis: Dict) -> float:
"""Calculate overall SEO score (0-100)."""
title_weight = 0.4
meta_weight = 0.6
return (title_analysis['score'] * title_weight) + (meta_analysis['score'] * meta_weight)
def generate_ai_recommendations(self, post_data: Dict, title_analysis: Dict,
meta_analysis: Dict) -> Optional[str]:
"""
Use Claude AI to generate specific optimization recommendations.
Args:
post_data: Post data
title_analysis: Title analysis results
meta_analysis: Meta description analysis
Returns:
AI-generated recommendations or None if AI disabled
"""
if not self.openrouter_api_key:
return None
prompt = f"""Analyze this blog post and provide specific SEO optimization recommendations:
Post Title: "{post_data['title']}"
Current Meta Description: "{post_data['meta_description'] or 'MISSING'}"
URL: {post_data['url']}
Title Analysis:
- Length: {title_analysis['length']} characters (target: 50-70)
- Issues: {', '.join(title_analysis['issues']) or 'None'}
Meta Description Analysis:
- Length: {meta_analysis['length']} characters (target: 120-160)
- Issues: {', '.join(meta_analysis['issues']) or 'None'}
Provide 2-3 specific, actionable recommendations to improve SEO. Focus on:
1. If title needs improvement: suggest a better title
2. If meta description is missing: write one
3. If both are weak: provide both improved versions
Format as:
- Recommendation 1: [specific action]
- Recommendation 2: [specific action]
etc.
Be concise and specific."""
try:
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.openrouter_api_key}",
"Content-Type": "application/json",
},
json={
"model": "anthropic/claude-3.5-sonnet",
"messages": [
{"role": "user", "content": prompt}
],
"temperature": 0.7,
},
timeout=30
)
response.raise_for_status()
result = response.json()
self.api_calls += 1
# Track cost (Claude 3.5 Sonnet: $3/$15 per 1M tokens)
usage = result.get('usage', {})
input_tokens = usage.get('prompt_tokens', 0)
output_tokens = usage.get('completion_tokens', 0)
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
recommendations = result['choices'][0]['message']['content'].strip()
return recommendations
except Exception as e:
logger.warning(f"AI recommendation failed: {e}")
return None
def _setup_progressive_csv(self) -> Optional[Tuple]:
"""
Setup CSV file for progressive writing.
Returns:
Tuple of (file_handle, writer) or None if progressive_csv is False
"""
if not self.progressive_csv:
return None
output_dir = Path(__file__).parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
csv_path = output_dir / f'seo_analysis_{timestamp}.csv'
fieldnames = [
'site', 'post_id', 'status', 'title', 'slug', 'url',
'meta_description', 'title_score', 'title_issues',
'title_recommendations', 'meta_score', 'meta_issues',
'meta_recommendations', 'overall_score', 'ai_recommendations',
]
csv_file = open(csv_path, 'w', newline='', encoding='utf-8')
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
csv_file.flush()
logger.info(f"✓ CSV file created: {csv_path}")
self.csv_file = csv_file
self.csv_writer = writer
return csv_path
def _write_result_to_csv(self, result: Dict) -> None:
"""Write a single result row to CSV file."""
if self.progressive_csv and self.csv_writer:
self.csv_writer.writerow(result)
self.csv_file.flush()
def analyze_all_sites(self, use_ai: bool = True, top_n: int = 10,
include_drafts: bool = False):
"""
Analyze all configured sites.
Args:
use_ai: Whether to use AI for recommendations
top_n: Number of top priority posts to get AI recommendations for
include_drafts: If True, include draft posts in analysis
"""
logger.info(f"Starting analysis of {len(self.sites_config)} sites...")
if include_drafts:
logger.info("(Including draft posts)")
logger.info("")
all_posts = []
# Fetch posts from all sites
for site_name, config in self.sites_config.items():
posts = self.fetch_posts_from_site(site_name, config, include_drafts=include_drafts)
if posts:
self.posts_data[site_name] = posts
all_posts.extend(posts)
if not all_posts:
logger.error("No posts found on any site")
return
logger.info(f"\nAnalyzing {len(all_posts)} posts...\n")
# Setup progressive CSV if enabled
csv_path = self._setup_progressive_csv()
# Analyze each post
for site_name, posts in self.posts_data.items():
logger.info(f"Analyzing {len(posts)} posts from {site_name}...")
for idx, post in enumerate(posts, 1):
seo_data = self.extract_seo_data(post, site_name)
title_analysis = self.analyze_title(seo_data['title'])
meta_analysis = self.analyze_meta_description(seo_data['meta_description'])
overall_score = self.calculate_overall_score(title_analysis, meta_analysis)
result = {
**seo_data,
'title_score': title_analysis['score'],
'title_issues': '|'.join(title_analysis['issues']) or 'None',
'title_recommendations': '|'.join(title_analysis['recommendations']),
'meta_score': meta_analysis['score'],
'meta_issues': '|'.join(meta_analysis['issues']) or 'None',
'meta_recommendations': '|'.join(meta_analysis['recommendations']),
'overall_score': overall_score,
'ai_recommendations': '',
}
self.analysis_results.append(result)
# Write to CSV progressively (before AI recommendations)
if self.progressive_csv:
self._write_result_to_csv(result)
logger.debug(f" [{idx}/{len(posts)}] Written: {seo_data['title'][:40]}")
# Sort by priority (lowest scores first) and get AI recommendations for top posts
if use_ai:
self.analysis_results.sort(key=lambda x: x['overall_score'])
logger.info(f"\nGenerating AI recommendations for top {top_n} posts...\n")
for idx, result in enumerate(self.analysis_results[:top_n], 1):
logger.info(f" [{idx}/{top_n}] {result['title'][:50]}...")
ai_recs = self.generate_ai_recommendations(
result,
{
'score': result['title_score'],
'issues': result['title_issues'].split('|'),
'length': len(result['title'])
},
{
'score': result['meta_score'],
'issues': result['meta_issues'].split('|'),
'length': len(result['meta_description'])
}
)
result['ai_recommendations'] = ai_recs or ''
# Update CSV with AI recommendations if using progressive CSV
if self.progressive_csv and self.csv_writer:
# Find and update the row in the CSV by re-writing it
# This is a limitation of CSV - we'll update in final export instead
pass
time.sleep(0.5) # Rate limiting
# Sort by overall score for final export
self.analysis_results.sort(key=lambda x: x['overall_score'])
# Close progressive CSV if open (will be re-written with final data including AI recs)
if self.progressive_csv and self.csv_file:
self.csv_file.close()
self.csv_file = None
self.csv_writer = None
def export_results(self, output_file: Optional[str] = None):
"""
Export analysis results to CSV.
Args:
output_file: Output file path (optional)
"""
if not output_file:
output_dir = Path(__file__).parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
if self.progressive_csv:
# Use same timestamp as progressive file
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# Find the most recent seo_analysis file
files = sorted(output_dir.glob('seo_analysis_*.csv'))
if files:
output_file = files[-1] # Use the most recent one
else:
output_file = output_dir / f'seo_analysis_{timestamp}_final.csv'
else:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = output_dir / f'seo_analysis_{timestamp}.csv'
output_file = Path(output_file)
output_file.parent.mkdir(parents=True, exist_ok=True)
if not self.analysis_results:
logger.error("No results to export")
return
fieldnames = [
'site',
'post_id',
'status',
'title',
'slug',
'url',
'meta_description',
'title_score',
'title_issues',
'title_recommendations',
'meta_score',
'meta_issues',
'meta_recommendations',
'overall_score',
'ai_recommendations',
]
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for result in self.analysis_results:
writer.writerow({field: result.get(field, '') for field in fieldnames})
if self.progressive_csv:
logger.info(f"\n✓ Final results saved to: {output_file}")
else:
logger.info(f"\n✓ Results exported to: {output_file}")
# Also export as a summary report
self.export_summary_report(output_file)
def export_summary_report(self, csv_file: Path):
"""Export a markdown summary report."""
report_file = csv_file.parent / f"{csv_file.stem}_summary.md"
# Group by site
by_site = {}
for result in self.analysis_results:
site = result['site']
if site not in by_site:
by_site[site] = []
by_site[site].append(result)
with open(report_file, 'w', encoding='utf-8') as f:
f.write("# Multi-Site SEO Analysis Report\n\n")
f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
# Summary stats
total_posts = len(self.analysis_results)
published = sum(1 for r in self.analysis_results if r['status'] == 'publish')
drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft')
avg_score = sum(r['overall_score'] for r in self.analysis_results) / total_posts if total_posts > 0 else 0
f.write("## Summary\n\n")
f.write(f"- **Total Posts:** {total_posts}\n")
if published > 0:
f.write(f" - Published: {published}\n")
if drafts > 0:
f.write(f" - Drafts: {drafts}\n")
f.write(f"- **Average SEO Score:** {avg_score:.1f}/100\n")
f.write(f"- **API Calls Made:** {self.api_calls}\n")
f.write(f"- **AI Cost:** ${self.ai_cost:.4f}\n")
f.write(f"- **Sites Analyzed:** {len(by_site)}\n\n")
# Priority issues
missing_meta = sum(1 for r in self.analysis_results if r['meta_score'] == 0)
weak_titles = sum(1 for r in self.analysis_results if r['title_score'] < 50)
weak_meta = sum(1 for r in self.analysis_results if r['meta_score'] < 50 and r['meta_score'] > 0)
f.write("## Priority Issues\n\n")
f.write(f"- **Missing Meta Descriptions:** {missing_meta} posts\n")
f.write(f"- **Weak Titles (Score < 50):** {weak_titles} posts\n")
f.write(f"- **Weak Meta (Score < 50):** {weak_meta} posts\n\n")
# By site
for site_name, posts in by_site.items():
avg = sum(p['overall_score'] for p in posts) / len(posts)
f.write(f"## {site_name}\n\n")
f.write(f"- **Posts:** {len(posts)}\n")
f.write(f"- **Avg Score:** {avg:.1f}/100\n")
f.write(f"- **Missing Meta:** {sum(1 for p in posts if p['meta_score'] == 0)}\n\n")
# Top 5 to optimize
f.write("### Top 5 Posts to Optimize\n\n")
for idx, post in enumerate(posts[:5], 1):
f.write(f"{idx}. **{post['title']}** (Score: {post['overall_score']:.0f})\n")
f.write(f" - URL: {post['url']}\n")
if post['meta_issues'] != 'None':
f.write(f" - Meta Issues: {post['meta_issues']}\n")
if post['ai_recommendations']:
f.write(f" - Recommendations: {post['ai_recommendations'].split(chr(10))[0]}\n")
f.write("\n")
f.write("\n## Legend\n\n")
f.write("- **Title Score:** Evaluates length, power words, numbers, readability\n")
f.write("- **Meta Score:** Evaluates presence, length, call-to-action\n")
f.write("- **Overall Score:** 40% title + 60% meta description\n")
f.write("- **Optimal Ranges:**\n")
f.write(" - Title: 50-70 characters\n")
f.write(" - Meta: 120-160 characters\n")
logger.info(f"✓ Summary report: {report_file}")
def run(self, use_ai: bool = True, top_n: int = 10, include_drafts: bool = False):
"""Run complete analysis."""
try:
self.analyze_all_sites(use_ai=use_ai, top_n=top_n, include_drafts=include_drafts)
self.export_results()
logger.info("\n" + "="*60)
logger.info("ANALYSIS COMPLETE")
logger.info("="*60)
logger.info(f"Total posts analyzed: {len(self.analysis_results)}")
published = sum(1 for r in self.analysis_results if r['status'] == 'publish')
drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft')
if published > 0:
logger.info(f" - Published: {published}")
if drafts > 0:
logger.info(f" - Drafts: {drafts}")
logger.info(f"AI recommendations: {sum(1 for r in self.analysis_results if r['ai_recommendations'])}")
logger.info(f"AI cost: ${self.ai_cost:.4f}")
except Exception as e:
logger.error(f"Analysis failed: {e}", exc_info=True)
sys.exit(1)
def check_meta_fields(site_url: str, username: str, password: str) -> None:
"""
Diagnostic function to check what meta fields are available on a site.
Args:
site_url: WordPress site URL
username: WordPress username
password: WordPress app password
"""
logger.info(f"\n{'='*60}")
logger.info("META FIELD DIAGNOSTIC")
logger.info(f"{'='*60}\n")
logger.info(f"Site: {site_url}")
logger.info("Checking available meta fields in first post...\n")
base_url = site_url.rstrip('/')
api_url = f"{base_url}/wp-json/wp/v2/posts"
auth = HTTPBasicAuth(username, password)
try:
params = {
'per_page': 1,
'status': 'publish'
}
response = requests.get(api_url, params=params, auth=auth, timeout=10)
response.raise_for_status()
posts = response.json()
if not posts:
logger.error("No posts found")
return
post = posts[0]
logger.info(f"Post: {post.get('title', {}).get('rendered', 'N/A')}")
logger.info(f"\nAvailable meta fields:")
if isinstance(post.get('meta'), dict):
meta_dict = post['meta']
if meta_dict:
for key, value in sorted(meta_dict.items()):
preview = str(value)[:60]
logger.info(f"{key}: {preview}")
else:
logger.info(" (No meta fields found)")
else:
logger.info(" (Meta is not a dictionary)")
logger.info(f"\nFull meta object:")
logger.info(json.dumps(post.get('meta', {}), indent=2)[:500])
except Exception as e:
logger.error(f"Error: {e}")
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Analyze SEO across multiple WordPress sites'
)
parser.add_argument(
'--no-ai',
action='store_true',
help='Skip AI recommendations to save cost'
)
parser.add_argument(
'--top-n',
type=int,
default=10,
help='Number of top posts to get AI recommendations for'
)
parser.add_argument(
'--output',
help='Output CSV file path'
)
parser.add_argument(
'--include-drafts',
action='store_true',
help='Include draft posts in analysis (published + drafts)'
)
parser.add_argument(
'--no-progressive',
action='store_true',
help='Disable real-time CSV writing (write only at end)'
)
parser.add_argument(
'--diagnose',
help='Diagnose meta fields for a site (URL). Example: --diagnose https://www.mistergeek.net'
)
args = parser.parse_args()
# Diagnostic mode
if args.diagnose:
# Ask for username/password if not in env
from getpass import getpass
username = Config.WORDPRESS_USERNAME
password = Config.WORDPRESS_APP_PASSWORD
if not username or not password:
logger.error("WORDPRESS_USERNAME and WORDPRESS_APP_PASSWORD must be set in .env")
sys.exit(1)
check_meta_fields(args.diagnose, username, password)
sys.exit(0)
analyzer = MultiSiteSEOAnalyzer(progressive_csv=not args.no_progressive)
analyzer.run(use_ai=not args.no_ai, top_n=args.top_n, include_drafts=args.include_drafts)
if __name__ == '__main__':
main()

View File

@@ -1,347 +0,0 @@
"""
Keyword opportunity analyzer for SEO optimization.
Identifies high-potential keywords ranking at positions 11-30.
"""
import csv
import json
import argparse
import time
from pathlib import Path
from openai import OpenAI
from config import Config
class OpportunityAnalyzer:
"""Analyze keyword opportunities for SEO optimization."""
def __init__(self):
"""Initialize analyzer."""
self.config = Config
self.output_dir = self.config.OUTPUT_DIR
self.logs = []
self.client = None
if self.config.OPENROUTER_API_KEY:
self.client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=self.config.OPENROUTER_API_KEY,
)
def log(self, message):
"""Add message to log."""
self.logs.append(message)
print(message)
def load_posts(self, posts_csv):
"""Load posts with analytics data."""
posts = []
if not posts_csv.exists():
self.log(f"❌ File not found: {posts_csv}")
return posts
try:
with open(posts_csv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
try:
posts.append({
'id': row.get('ID', ''),
'title': row.get('Title', ''),
'url': row.get('URL', ''),
'impressions': int(row.get('impressions', 0) or 0),
'clicks': int(row.get('clicks', 0) or 0),
'avg_position': float(row.get('avg_position', 0) or 0),
'ctr': float(row.get('ctr', 0) or 0),
'traffic': int(row.get('traffic', 0) or 0),
'bounce_rate': float(row.get('bounce_rate', 0) or 0),
'keywords_count': int(row.get('keywords_count', 0) or 0),
'top_keywords': row.get('top_keywords', '')
})
except (ValueError, TypeError):
continue
self.log(f"✓ Loaded {len(posts)} posts")
except Exception as e:
self.log(f"❌ Error reading posts: {e}")
return posts
def filter_opportunities(self, posts, min_pos, max_pos, min_impressions):
"""Filter posts with keywords in opportunity range or high traffic for optimization."""
opportunities = []
for post in posts:
position = post.get('avg_position', 0)
impressions = post.get('impressions', 0)
traffic = post.get('traffic', 0)
# Primary filter: position range (if data available)
if position > 0:
if min_pos <= position <= max_pos and impressions >= min_impressions:
opportunities.append(post)
# Fallback: filter by traffic when position data unavailable
# Include posts with any traffic for optimization analysis
elif traffic > 0:
opportunities.append(post)
self.log(f"✓ Found {len(opportunities)} posts for optimization analysis")
if opportunities:
traffic_posts = [p for p in opportunities if p.get('traffic', 0) > 0]
self.log(f" ({len(traffic_posts)} have traffic data, {len(opportunities) - len(traffic_posts)} selected for analysis)")
return opportunities
def calculate_opportunity_score(self, post):
"""Calculate opportunity score (0-100) for a post."""
position = post.get('avg_position', 50)
impressions = post.get('impressions', 0)
ctr = post.get('ctr', 0)
traffic = post.get('traffic', 0)
# Position score (35%): Closer to page 1 = higher
# Position 11-30 range
position_score = max(0, (30 - position) / 19 * 35)
# Traffic potential (30%): Based on impressions
# Normalize to 0-30
traffic_potential = min(30, (impressions / 1000) * 30)
# CTR improvement potential (20%): Gap between current and expected CTR
# Expected CTR at position X
expected_ctr_map = {
11: 0.02, 12: 0.02, 13: 0.015, 14: 0.015, 15: 0.013,
16: 0.012, 17: 0.011, 18: 0.01, 19: 0.009, 20: 0.008,
21: 0.008, 22: 0.007, 23: 0.007, 24: 0.006, 25: 0.006,
26: 0.006, 27: 0.005, 28: 0.005, 29: 0.005, 30: 0.004
}
expected_ctr = expected_ctr_map.get(int(position), 0.005)
ctr_gap = max(0, expected_ctr - ctr)
ctr_score = min(20, (ctr_gap / expected_ctr * 100 / 5) * 20)
# Content quality (15%): Existing traffic and engagement
quality_score = min(15, (traffic / 100) * 7.5 +
(100 - post.get('bounce_rate', 50)) / 100 * 7.5)
return round(position_score + traffic_potential + ctr_score + quality_score, 1)
def estimate_traffic_gain(self, post):
"""Estimate potential traffic gain from optimization."""
position = post.get('avg_position', 50)
impressions = post.get('impressions', 0)
ctr = post.get('ctr', 0)
# Estimate CTR improvement from moving one position up
# Moving from position X to X-1 typically improves CTR by 20-30%
current_traffic = impressions * ctr
if position > 11:
# Target position: 1 ahead
improvement_factor = 1.25 # 25% improvement per position
estimated_new_traffic = current_traffic * improvement_factor
gain = estimated_new_traffic - current_traffic
else:
gain = 0
return round(gain, 0)
def generate_ai_recommendations(self, post):
"""Generate AI recommendations for top opportunities."""
if not self.client:
return None
try:
keywords = post.get('top_keywords', '').split(',')[:5]
keywords_str = ', '.join([k.strip() for k in keywords if k.strip()])
prompt = f"""Analyze keyword optimization opportunities for this blog post:
Post Title: {post['title']}
Current Position: {post['avg_position']:.1f}
Monthly Impressions: {post['impressions']}
Current CTR: {post['ctr']:.2%}
Top Keywords: {keywords_str}
Provide 2-3 specific, actionable recommendations to:
1. Improve the SEO title to increase CTR
2. Enhance the meta description
3. Target structural improvements (headers, content gaps)
Focus on moving this post from positions 11-20 to page 1 (positions 1-10).
Be specific and practical.
Return as JSON:
{{
"title_recommendations": ["recommendation 1", "recommendation 2"],
"description_recommendations": ["recommendation 1", "recommendation 2"],
"content_recommendations": ["recommendation 1", "recommendation 2"],
"estimated_effort_hours": number,
"expected_position_improvement": number
}}"""
response = self.client.chat.completions.create(
model=self.config.AI_MODEL,
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=500
)
try:
result_text = response.choices[0].message.content
# Extract JSON
start_idx = result_text.find('{')
end_idx = result_text.rfind('}') + 1
if start_idx >= 0 and end_idx > start_idx:
return json.loads(result_text[start_idx:end_idx])
except json.JSONDecodeError:
self.log(f"⚠️ Could not parse AI response for {post['title']}")
return None
except Exception as e:
self.log(f"⚠️ AI generation failed for {post['title']}: {e}")
return None
def export_opportunities_csv(self, opportunities, output_csv):
"""Export opportunities to CSV."""
if not opportunities:
self.log("⚠️ No opportunities to export")
return
try:
fieldnames = [
'ID', 'Title', 'URL', 'avg_position', 'impressions', 'clicks',
'ctr', 'traffic', 'bounce_rate', 'keywords_count', 'top_keywords',
'opportunity_score', 'estimated_traffic_gain',
'title_recommendations', 'description_recommendations',
'content_recommendations', 'estimated_effort_hours',
'expected_position_improvement'
]
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
writer.writeheader()
for opp in sorted(opportunities, key=lambda x: x['opportunity_score'], reverse=True):
row = {
'ID': opp['id'],
'Title': opp['title'],
'URL': opp['url'],
'avg_position': opp['avg_position'],
'impressions': opp['impressions'],
'clicks': opp['clicks'],
'ctr': f"{opp['ctr']:.2%}",
'traffic': opp['traffic'],
'bounce_rate': opp['bounce_rate'],
'keywords_count': opp['keywords_count'],
'top_keywords': opp['top_keywords'],
'opportunity_score': opp['opportunity_score'],
'estimated_traffic_gain': opp['estimated_traffic_gain'],
'title_recommendations': opp.get('title_recommendations_str', ''),
'description_recommendations': opp.get('description_recommendations_str', ''),
'content_recommendations': opp.get('content_recommendations_str', ''),
'estimated_effort_hours': opp.get('estimated_effort_hours', ''),
'expected_position_improvement': opp.get('expected_position_improvement', '')
}
writer.writerow(row)
self.log(f"✓ Exported {len(opportunities)} opportunities to {output_csv}")
except Exception as e:
self.log(f"❌ Error exporting CSV: {e}")
def export_log(self, log_file):
"""Export analysis log."""
try:
with open(log_file, 'w', encoding='utf-8') as f:
f.write("SEO Opportunity Analysis Report\n")
f.write("=" * 60 + "\n\n")
for msg in self.logs:
f.write(msg + "\n")
self.log(f"✓ Exported log to {log_file}")
except Exception as e:
self.log(f"❌ Error exporting log: {e}")
def run(self, posts_csv, output_csv, min_position=11, max_position=30,
min_impressions=50, top_n=20):
"""Run complete analysis workflow."""
self.log("🔍 Starting keyword opportunity analysis...")
self.log(f"Input: {posts_csv}")
self.log(f"Position range: {min_position}-{max_position}")
self.log(f"Min impressions: {min_impressions}")
self.log(f"Top N for AI analysis: {top_n}\n")
# Load posts
posts = self.load_posts(posts_csv)
if not posts:
return
# Filter opportunities
opportunities = self.filter_opportunities(posts, min_position, max_position, min_impressions)
if not opportunities:
self.log("⚠️ No opportunities found in specified range")
return
# Calculate scores
self.log("\n📊 Calculating opportunity scores...")
for opp in opportunities:
opp['opportunity_score'] = self.calculate_opportunity_score(opp)
opp['estimated_traffic_gain'] = self.estimate_traffic_gain(opp)
# Sort by score
opportunities = sorted(opportunities, key=lambda x: x['opportunity_score'], reverse=True)
# Get AI recommendations for top N
self.log(f"\n🤖 Generating AI recommendations for top {min(top_n, len(opportunities))} opportunities...")
for i, opp in enumerate(opportunities[:top_n]):
self.log(f" [{i+1}/{min(top_n, len(opportunities))}] {opp['title'][:50]}...")
recommendations = self.generate_ai_recommendations(opp)
if recommendations:
opp['title_recommendations_str'] = '; '.join(recommendations.get('title_recommendations', []))
opp['description_recommendations_str'] = '; '.join(recommendations.get('description_recommendations', []))
opp['content_recommendations_str'] = '; '.join(recommendations.get('content_recommendations', []))
opp['estimated_effort_hours'] = recommendations.get('estimated_effort_hours', '')
opp['expected_position_improvement'] = recommendations.get('expected_position_improvement', '')
time.sleep(0.2) # Rate limiting
# Export
self.log("\n📁 Exporting results...")
self.export_opportunities_csv(opportunities, output_csv)
# Export log
log_dir = self.output_dir / 'logs'
log_dir.mkdir(exist_ok=True)
log_file = log_dir / 'opportunity_analysis_log.txt'
self.export_log(log_file)
self.log(f"\n✓ Analysis complete! {len(opportunities)} opportunities identified.")
self.log(f" Top opportunity: {opportunities[0]['title'][:50]}... (score: {opportunities[0]['opportunity_score']})")
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(description='Analyze keyword opportunities')
parser.add_argument('--input', type=Path,
default=Path('output/results/posts_with_analytics.csv'),
help='Input posts CSV')
parser.add_argument('--output', type=Path,
default=Path('output/results/keyword_opportunities.csv'),
help='Output opportunities CSV')
parser.add_argument('--min-position', type=int, default=11,
help='Minimum position (start of range)')
parser.add_argument('--max-position', type=int, default=30,
help='Maximum position (end of range)')
parser.add_argument('--min-impressions', type=int, default=50,
help='Minimum impressions to consider')
parser.add_argument('--top-n', type=int, default=20,
help='Top N for AI recommendations')
args = parser.parse_args()
analyzer = OpportunityAnalyzer()
analyzer.run(args.input, args.output, args.min_position, args.max_position,
args.min_impressions, args.top_n)
if __name__ == '__main__':
main()

View File

@@ -1,436 +0,0 @@
"""
SEO optimization report generator.
Consolidates all analysis into comprehensive markdown report and action plan.
"""
import csv
import json
import argparse
from pathlib import Path
from datetime import datetime
from config import Config
class ReportGenerator:
"""Generate comprehensive SEO optimization report."""
def __init__(self):
"""Initialize generator."""
self.config = Config
self.output_dir = self.config.OUTPUT_DIR
self.logs = []
def log(self, message):
"""Add message to log."""
self.logs.append(message)
print(message)
def load_posts_with_analytics(self, csv_path):
"""Load posts with all analytics data."""
posts = {}
if not csv_path.exists():
self.log(f"❌ File not found: {csv_path}")
return posts
try:
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
post_id = row.get('ID')
if not post_id:
continue
# Handle different title column names
title = (row.get('Title') or
row.get('title') or
row.get('post_title') or '')
posts[post_id] = {
'title': title,
'url': row.get('URL') or row.get('url') or row.get('post_url') or '',
'seo_title': row.get('SEO Title') or row.get('seo_title') or '',
'meta_description': row.get('Meta Description') or row.get('meta_description') or '',
'traffic': int(row.get('traffic', 0) or 0),
'users': int(row.get('users', 0) or 0),
'bounce_rate': float(row.get('bounce_rate', 0) or 0),
'impressions': int(row.get('impressions', 0) or 0),
'clicks': int(row.get('clicks', 0) or 0),
'avg_position': float(row.get('avg_position', 0) or 0),
'ctr': float(row.get('ctr', 0) or 0),
'keywords_count': int(row.get('keywords_count', 0) or 0),
'top_keywords': row.get('top_keywords', '')
}
self.log(f"✓ Loaded {len(posts)} posts")
except Exception as e:
self.log(f"❌ Error reading posts: {e}")
return posts
def load_opportunities(self, csv_path):
"""Load keyword opportunities."""
opportunities = {}
if not csv_path.exists():
self.log(f"⚠️ Opportunities file not found: {csv_path}")
return opportunities
try:
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
post_id = row.get('ID')
if post_id:
try:
opportunities[post_id] = {
'opportunity_score': float(row.get('opportunity_score', 0) or 0),
'estimated_traffic_gain': int(float(row.get('estimated_traffic_gain', 0) or 0)),
'title_recommendations': row.get('title_recommendations', ''),
'description_recommendations': row.get('description_recommendations', ''),
'content_recommendations': row.get('content_recommendations', '')
}
except (ValueError, TypeError):
# Skip rows with parsing errors
continue
self.log(f"✓ Loaded {len(opportunities)} opportunities")
except Exception as e:
self.log(f"⚠️ Error reading opportunities: {e}")
return opportunities
def load_content_gaps(self, csv_path):
"""Load content gap suggestions."""
gaps = []
if not csv_path.exists():
self.log(f"⚠️ Content gaps file not found: {csv_path}")
return gaps
try:
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
gaps.append({
'title': row.get('title', ''),
'why_valuable': row.get('why_valuable', ''),
'search_volume': row.get('search_volume', ''),
'format': row.get('format', ''),
'traffic_potential': int(row.get('traffic_potential', 0) or 0),
'priority': row.get('priority', 'medium')
})
self.log(f"✓ Loaded {len(gaps)} content gap ideas")
except Exception as e:
self.log(f"⚠️ Error reading content gaps: {e}")
return gaps
def calculate_priority_score(self, post, opportunity=None):
"""Calculate comprehensive priority score (0-100)."""
position = post.get('avg_position', 50)
impressions = post.get('impressions', 0)
ctr = post.get('ctr', 0)
traffic = post.get('traffic', 0)
# Position score (35%): Closer to page 1 = higher
if position > 0 and position <= 30:
position_score = max(0, (30 - position) / 29 * 35)
else:
position_score = 0
# Traffic potential (30%): Based on impressions
traffic_potential = min(30, (impressions / 1000) * 30)
# CTR improvement (20%): Gap vs expected
expected_ctr_map = {
1: 0.30, 2: 0.16, 3: 0.11, 4: 0.08, 5: 0.07,
6: 0.06, 7: 0.05, 8: 0.05, 9: 0.04, 10: 0.04,
11: 0.02, 12: 0.02, 13: 0.015, 14: 0.015, 15: 0.013,
16: 0.012, 17: 0.011, 18: 0.01, 19: 0.009, 20: 0.008
}
expected_ctr = expected_ctr_map.get(int(position), 0.005) if position > 0 else 0
if expected_ctr > 0:
ctr_gap = max(0, expected_ctr - ctr)
ctr_score = min(20, (ctr_gap / expected_ctr * 100 / 5) * 20)
else:
ctr_score = 0
# Content quality (15%): Existing traffic and engagement
quality_score = min(15, (traffic / 100) * 7.5 +
(100 - post.get('bounce_rate', 50)) / 100 * 7.5)
total = round(position_score + traffic_potential + ctr_score + quality_score, 1)
return max(0, min(100, total))
def generate_markdown_report(self, posts, opportunities, gaps, top_n=20):
"""Generate comprehensive markdown report."""
report = []
report.append("# SEO Optimization Strategy Report\n")
report.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n")
# Calculate metrics
total_traffic = sum(p.get('traffic', 0) for p in posts.values())
total_impressions = sum(p.get('impressions', 0) for p in posts.values())
avg_position = sum(p.get('avg_position', 50) for p in posts.values() if p.get('avg_position', 0) > 0) / max(1, len([p for p in posts.values() if p.get('avg_position', 0) > 0]))
# Executive Summary
report.append("## Executive Summary\n")
report.append(f"- **Total Posts Analyzed:** {len(posts)}\n")
report.append(f"- **Current Monthly Traffic:** {total_traffic:,} visits\n")
report.append(f"- **Total Impressions (90d):** {total_impressions:,}\n")
report.append(f"- **Average Search Position:** {avg_position:.1f}\n")
report.append(f"- **Optimization Opportunities:** {len(opportunities)}\n")
report.append(f"- **Content Gap Ideas:** {len(gaps)}\n")
report.append(f"- **Potential Traffic Gain (Phase 1):** +{sum(o.get('estimated_traffic_gain', 0) for o in opportunities.values()):,} visits/month\n\n")
# Key Metrics
report.append("### Quick Wins (Estimated Impact)\n\n")
quick_wins = sorted(opportunities.values(),
key=lambda x: x.get('estimated_traffic_gain', 0),
reverse=True)[:5]
total_quick_win_traffic = sum(w.get('estimated_traffic_gain', 0) for w in quick_wins)
report.append(f"Top 5 opportunities could bring **+{total_quick_win_traffic:,} visits/month**\n\n")
# Top 20 Posts to Optimize
report.append("## Top 20 Posts to Optimize\n\n")
report.append("Ranked by optimization potential (combination of position, traffic potential, and CTR improvement).\n\n")
# Score all posts
scored_posts = []
for post_id, post in posts.items():
opp = opportunities.get(post_id, {})
score = self.calculate_priority_score(post, opp)
scored_posts.append((post_id, post, opp, score))
scored_posts = sorted(scored_posts, key=lambda x: x[3], reverse=True)
for i, (post_id, post, opp, score) in enumerate(scored_posts[:top_n], 1):
position = post.get('avg_position', 0)
impressions = post.get('impressions', 0)
traffic = post.get('traffic', 0)
report.append(f"### {i}. {post['title']}\n\n")
report.append(f"**Current Position:** {position:.1f} | **Impressions:** {impressions:,} | **Traffic:** {traffic} visits\n")
report.append(f"**Priority Score:** {score:.1f}/100 | **Estimated Gain:** +{opp.get('estimated_traffic_gain', 0)} visits\n\n")
if position > 0 and position <= 30:
report.append(f"**Status:** Ranking on {'page 1' if position <= 10 else 'page 2-3'}\n\n")
if opp.get('title_recommendations'):
report.append("**Title Optimization:**\n")
for rec in opp['title_recommendations'].split(';'):
rec = rec.strip()
if rec:
report.append(f"- {rec}\n")
report.append("\n")
if opp.get('description_recommendations'):
report.append("**Meta Description:**\n")
for rec in opp['description_recommendations'].split(';'):
rec = rec.strip()
if rec:
report.append(f"- {rec}\n")
report.append("\n")
if opp.get('content_recommendations'):
report.append("**Content Improvements:**\n")
for rec in opp['content_recommendations'].split(';'):
rec = rec.strip()
if rec:
report.append(f"- {rec}\n")
report.append("\n")
report.append("---\n\n")
# Keyword Opportunities Summary
report.append("## Keyword Opportunities Summary\n\n")
opportunity_categories = {
'page_2': [],
'page_3': [],
'ready_for_optimization': []
}
for opp_id, opp in opportunities.items():
if any(opp_id == p[0] for p in scored_posts[:top_n]):
score = opp.get('opportunity_score', 0)
post = posts.get(opp_id, {})
position = post.get('avg_position', 0)
if 11 <= position <= 15:
opportunity_categories['page_2'].append((score, opp))
elif 16 <= position <= 30:
opportunity_categories['page_3'].append((score, opp))
report.append(f"**Page 2 (Positions 11-15):** {len(opportunity_categories['page_2'])} keywords ready for quick wins\n")
report.append(f"**Page 3+ (Positions 16-30):** {len(opportunity_categories['page_3'])} keywords with medium effort\n\n")
# Content Gap Analysis
report.append("## Content Gap Analysis\n\n")
report.append(f"Identified **{len(gaps)} high-value content opportunities** not currently covered:\n\n")
for i, gap in enumerate(sorted(gaps, key=lambda x: x.get('priority') == 'high', reverse=True)[:15], 1):
report.append(f"### {i}. {gap['title']}\n\n")
report.append(f"**Priority:** {gap.get('priority', 'medium').upper()}\n")
report.append(f"**Search Volume:** {gap.get('search_volume', 'medium')}\n")
report.append(f"**Format:** {gap.get('format', 'guide')}\n")
report.append(f"**Estimated Traffic Potential:** +{gap.get('traffic_potential', 50)} visits/month\n\n")
if gap.get('why_valuable'):
report.append(f"**Why valuable:** {gap['why_valuable']}\n\n")
# 90-Day Action Plan
report.append("## 90-Day Action Plan\n\n")
report.append("### Week 1-2: Quick Wins (Estimated +100 visits/month)\n\n")
report.append("Focus on posts with highest opportunity scores that are already ranking on page 2:\n\n")
quick_wins_phase = sorted(scored_posts[:top_n], key=lambda x: x[3], reverse=True)[:5]
for i, (post_id, post, opp, score) in enumerate(quick_wins_phase, 1):
report.append(f"{i}. **{post['title'][:60]}**\n")
report.append(f" - Update SEO title and meta description\n")
report.append(f" - Estimated effort: 30-60 minutes\n")
report.append(f" - Expected gain: +{opp.get('estimated_traffic_gain', 50)} visits\n\n")
report.append("### Week 3-4: Core Content Optimization (Estimated +150 visits/month)\n\n")
report.append("Improve content structure and internal linking:\n\n")
mid_phase = sorted(scored_posts[5:15], key=lambda x: x[3], reverse=True)[:5]
for i, (post_id, post, opp, score) in enumerate(mid_phase, 1):
report.append(f"{i}. **{post['title'][:60]}**\n")
report.append(f" - Add missing content sections\n")
report.append(f" - Improve header structure\n")
report.append(f" - Estimated effort: 2-3 hours\n\n")
report.append("### Week 5-8: New Content Creation (Estimated +300 visits/month)\n\n")
report.append("Create 3-5 pieces of new content targeting high-value gaps:\n\n")
for i, gap in enumerate(sorted(gaps, key=lambda x: x.get('traffic_potential', 0), reverse=True)[:4], 1):
report.append(f"{i}. **{gap['title']}** ({gap.get('format', 'guide').title()})\n")
report.append(f" - Estimated effort: 4-6 hours\n")
report.append(f" - Expected traffic: +{gap.get('traffic_potential', 50)} visits/month\n\n")
report.append("### Week 9-12: Refinement & Analysis (Estimated +100 visits/month)\n\n")
report.append("- Monitor ranking changes and CTR improvements\n")
report.append("- Refine underperforming optimizations\n")
report.append("- Re-run keyword analysis to identify new opportunities\n\n")
report.append("**Total Estimated 90-Day Impact: +650 visits/month (+~7.8% growth)**\n\n")
# Methodology
report.append("## Methodology\n\n")
report.append("### Priority Score Calculation\n\n")
report.append("Each post is scored based on:\n")
report.append("- **Position (35%):** Posts ranking 11-20 get highest scores (closest to page 1)\n")
report.append("- **Traffic Potential (30%):** Based on search impressions\n")
report.append("- **CTR Gap (20%):** Difference between current and expected CTR for position\n")
report.append("- **Content Quality (15%):** Existing traffic and bounce rate\n\n")
report.append("### Data Sources\n\n")
report.append("- **Google Analytics:** Traffic metrics (90-day window)\n")
report.append("- **Google Search Console:** Keyword data, impressions, clicks, positions\n")
report.append("- **WordPress REST API:** Current SEO metadata and content structure\n\n")
report.append("### Assumptions\n\n")
report.append("- Traffic estimates are based on historical CTR and position data\n")
report.append("- Moving one position up typically improves CTR by 20-30%\n")
report.append("- Page 1 rankings (positions 1-10) receive ~20-30% of total impressions\n")
report.append("- New content takes 4-8 weeks to gain significant traction\n\n")
return "\n".join(report)
def export_report(self, report_text, output_md):
"""Export markdown report."""
try:
with open(output_md, 'w', encoding='utf-8') as f:
f.write(report_text)
self.log(f"✓ Exported report to {output_md}")
except Exception as e:
self.log(f"❌ Error exporting report: {e}")
def export_prioritized_csv(self, posts, opportunities, output_csv):
"""Export all posts with priority scores."""
try:
scored_posts = []
for post_id, post in posts.items():
opp = opportunities.get(post_id, {})
score = self.calculate_priority_score(post, opp)
scored_posts.append({
'ID': post_id,
'Title': post.get('title', ''),
'URL': post.get('url', ''),
'Priority_Score': score,
'Estimated_Traffic_Gain': opp.get('estimated_traffic_gain', 0),
'Current_Position': post.get('avg_position', 0),
'Impressions': post.get('impressions', 0),
'Traffic': post.get('traffic', 0),
'CTR': f"{post.get('ctr', 0):.2%}",
'Keywords_Count': post.get('keywords_count', 0)
})
scored_posts = sorted(scored_posts, key=lambda x: x['Priority_Score'], reverse=True)
fieldnames = ['ID', 'Title', 'URL', 'Priority_Score', 'Estimated_Traffic_Gain',
'Current_Position', 'Impressions', 'Traffic', 'CTR', 'Keywords_Count']
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(scored_posts)
self.log(f"✓ Exported {len(scored_posts)} prioritized posts to {output_csv}")
except Exception as e:
self.log(f"❌ Error exporting prioritized CSV: {e}")
def run(self, posts_csv, opportunities_csv, gaps_csv, output_md, output_prioritized_csv, top_n=20):
"""Run complete report generation workflow."""
self.log("📊 Generating SEO optimization report...")
self.log(f"Input files: posts_with_analytics, opportunities, content_gaps\n")
# Load data
posts = self.load_posts_with_analytics(posts_csv)
opportunities = self.load_opportunities(opportunities_csv)
gaps = self.load_content_gaps(gaps_csv)
if not posts:
self.log("❌ No posts loaded. Cannot generate report.")
return
# Generate report
self.log("\n📝 Generating markdown report...")
report_text = self.generate_markdown_report(posts, opportunities, gaps, top_n)
# Export report
self.log("\n📁 Exporting files...")
self.export_report(report_text, output_md)
self.export_prioritized_csv(posts, opportunities, output_prioritized_csv)
self.log("\n✓ Report generation complete!")
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(description='Generate SEO optimization report')
parser.add_argument('--posts-with-analytics', type=Path,
default=Path('output/results/posts_with_analytics.csv'),
help='Posts with analytics CSV')
parser.add_argument('--keyword-opportunities', type=Path,
default=Path('output/results/keyword_opportunities.csv'),
help='Keyword opportunities CSV')
parser.add_argument('--content-gaps', type=Path,
default=Path('output/results/content_gaps.csv'),
help='Content gaps CSV')
parser.add_argument('--output-report', type=Path,
default=Path('output/results/seo_optimization_report.md'),
help='Output markdown report')
parser.add_argument('--output-csv', type=Path,
default=Path('output/results/posts_prioritized.csv'),
help='Output prioritized posts CSV')
parser.add_argument('--top-n', type=int, default=20,
help='Number of top posts to detail')
args = parser.parse_args()
generator = ReportGenerator()
generator.run(args.posts_with_analytics, args.keyword_opportunities,
args.content_gaps, args.output_report, args.output_csv, args.top_n)
if __name__ == '__main__':
main()

View File

@@ -1,73 +0,0 @@
#!/bin/bash
set -e
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ SEO Analysis & Improvement System - Full Pipeline ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
# Check if venv exists
if [ ! -d "venv" ]; then
echo "❌ Virtual environment not found. Please run: python3 -m venv venv"
exit 1
fi
# Check if input files exist
if [ ! -f "input/new-propositions.csv" ]; then
echo "❌ Missing input/new-propositions.csv"
echo "Please place your WordPress posts CSV in input/ directory"
exit 1
fi
if [ ! -f "input/analytics/ga4_export.csv" ]; then
echo "❌ Missing input/analytics/ga4_export.csv"
echo "Please export GA4 data and place it in input/analytics/"
exit 1
fi
# Create output directories
mkdir -p output/results
mkdir -p output/logs
echo "📊 Step 1: Analytics Integration"
echo " Merging GA4, Search Console, and WordPress data..."
./venv/bin/python analytics_importer.py
echo ""
echo "🔍 Step 2: Keyword Opportunity Analysis"
echo " Identifying high-potential optimization opportunities..."
./venv/bin/python opportunity_analyzer.py \
--input output/results/posts_with_analytics.csv \
--output output/results/keyword_opportunities.csv \
--min-position 11 \
--max-position 30 \
--min-impressions 50 \
--top-n 20
echo ""
echo "📝 Step 3: Report Generation"
echo " Creating comprehensive SEO optimization report..."
./venv/bin/python report_generator.py
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ ✅ Analysis Complete! ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
echo "📂 Results Location:"
echo " └─ output/results/seo_optimization_report.md"
echo ""
echo "📊 Key Files:"
echo " ├─ posts_prioritized.csv (all posts ranked 0-100)"
echo " ├─ keyword_opportunities.csv (26 optimization opportunities)"
echo " └─ posts_with_analytics.csv (enriched dataset)"
echo ""
echo "📋 Logs:"
echo " └─ output/logs/"
echo ""
echo "🚀 Next Steps:"
echo " 1. Open: output/results/seo_optimization_report.md"
echo " 2. Review Top 20 Posts to Optimize"
echo " 3. Start with Quick Wins (positions 11-15)"
echo " 4. Follow 90-day action plan"
echo ""

View File

@@ -1,388 +0,0 @@
#!/usr/bin/env python3
"""
DEPRECATED: SEO Automation CLI
This script is deprecated. Please use the new unified CLI:
- ./seo export
- ./seo analyze
- ./seo seo_check
- ./seo categories
- ./seo full_pipeline
To see all commands: ./seo help
"""
import sys
import subprocess
import argparse
from pathlib import Path
from config import Config
import os
class SEOCLI:
"""DEPRECATED: Main CLI orchestrator for SEO workflows. Use new ./seo CLI instead."""
def __init__(self):
"""Initialize CLI."""
print("⚠️ DEPRECATION WARNING: This CLI is deprecated. Use ./seo instead.")
print(" Run './seo help' to see new commands.")
self.scripts_dir = Path(__file__).parent
self.project_dir = self.scripts_dir.parent
self.output_dir = self.project_dir / 'output' / 'reports'
def run_command(self, command, description):
"""Run a command and show progress."""
print(f"\n{'='*70}")
print(f"{description}")
print(f"{'='*70}\n")
try:
result = subprocess.run(command, shell=True, cwd=self.project_dir)
if result.returncode != 0:
print(f"\n❌ Error running: {description}")
return False
print(f"\n{description} completed successfully")
return True
except Exception as e:
print(f"\n❌ Error: {e}")
return False
def get_latest_file(self, pattern):
"""Get most recent file matching pattern."""
import glob
# Support both old and new naming patterns
files = glob.glob(str(self.output_dir / pattern))
if not files:
# Try new pattern
files = glob.glob(str(self.output_dir / "all_posts_*.csv"))
if not files:
return None
return max(files, key=os.path.getctime)
def export_posts(self):
"""Export all posts to CSV."""
cmd = f"python {self.scripts_dir}/export_posts_for_ai_decision.py"
return self.run_command(cmd, "STEP 1: Export All Posts")
def analyze_with_ai(self, csv_file=None):
"""Analyze exported posts with AI."""
if not csv_file:
csv_file = self.get_latest_file("all_posts_for_ai_decision_*.csv")
if not csv_file:
print("\n❌ No exported CSV found. Run 'seo-cli export' first.")
return False
cmd = f"python {self.scripts_dir}/ai_analyze_posts_for_decisions.py \"{csv_file}\""
return self.run_command(cmd, "STEP 2: Analyze with AI")
def recategorize_with_ai(self, csv_file=None):
"""Recategorize posts using AI."""
if not csv_file:
csv_file = self.get_latest_file("all_posts_for_ai_decision_*.csv")
if not csv_file:
print("\n❌ No exported CSV found. Run 'seo-cli export' first.")
return False
cmd = f"python {self.scripts_dir}/ai_recategorize_posts.py \"{csv_file}\""
return self.run_command(cmd, "Recategorizing Posts with AI")
def seo_check(self, top_n=None):
"""Check SEO quality of titles and meta descriptions."""
cmd = f"python {self.scripts_dir}/multi_site_seo_analyzer.py"
if top_n:
cmd += f" --top-n {top_n}"
return self.run_command(cmd, f"SEO Quality Check (Top {top_n or 'All'} posts)")
def import_analytics(self, ga_export, gsc_export, posts_csv=None):
"""Import analytics data."""
if not posts_csv:
posts_csv = self.get_latest_file("all_posts_for_ai_decision_*.csv")
if not posts_csv:
print("\n❌ No posts CSV found. Run 'seo-cli export' first.")
return False
cmd = (
f"python {self.scripts_dir}/analytics_importer.py "
f"--ga-export \"{ga_export}\" "
f"--gsc-export \"{gsc_export}\" "
f"--posts-csv \"{posts_csv}\" "
f"--output output/posts_with_analytics.csv"
)
return self.run_command(cmd, "STEP: Import Analytics Data")
def full_pipeline(self, analyze=True, seo=True):
"""Run complete pipeline: export → analyze → seo check."""
steps = [
("Export", self.export_posts),
]
if analyze:
steps.append(("Analyze", self.analyze_with_ai))
if seo:
steps.append(("SEO Check", self.seo_check))
print("\n" + "="*70)
print("🚀 STARTING FULL PIPELINE")
print("="*70)
print(f"\nSteps to run: {', '.join([s[0] for s in steps])}\n")
completed = 0
for name, func in steps:
if func():
completed += 1
else:
print(f"\n⚠️ Pipeline stopped at: {name}")
return False
print("\n" + "="*70)
print(f"✓ PIPELINE COMPLETE - All {completed} steps succeeded!")
print("="*70)
print("\nNext steps:")
print("1. Review results in output/reports/")
print("2. Check: posts_with_ai_recommendations_*.csv")
print("3. Follow AI recommendations to optimize your content")
return True
def manage_categories(self):
"""Run category management with AI recommendations."""
cmd = f"python {self.scripts_dir}/category_manager.py"
return self.run_command(cmd, "Category Management with AI Recommendations")
def approve_recommendations(self, csv_files=None):
"""Approve recommendations from CSV files."""
if not csv_files:
print("\n❌ No CSV files provided for approval.")
return False
# Join the CSV files into a single command argument
csv_files_str = " ".join(f'"{csv_file}"' for csv_file in csv_files)
cmd = f"python {self.scripts_dir}/user_approval.py {csv_files_str}"
return self.run_command(cmd, f"Approving Recommendations from {len(csv_files)} files")
def show_status(self):
"""Show status of output files."""
print("\n" + "="*70)
print("📊 OUTPUT FILES STATUS")
print("="*70 + "\n")
import glob
files = glob.glob(str(self.output_dir / "*"))
if not files:
print("No output files yet. Run 'seo-cli export' to get started.\n")
return
# Sort by date
files.sort(key=os.path.getctime, reverse=True)
for file in files[:10]: # Show last 10 files
size = os.path.getsize(file) / 1024 # KB
mtime = os.path.getmtime(file)
from datetime import datetime
date = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
filename = os.path.basename(file)
print(f" {filename}")
print(f" Size: {size:.1f} KB | Modified: {date}")
print()
def list_workflows(self):
"""List available workflows."""
workflows = {
'export': {
'description': 'Export all posts from your 3 WordPress sites',
'command': 'seo-cli export',
'time': '5-10 min',
'cost': 'Free'
},
'analyze': {
'description': 'Analyze exported posts with Claude AI',
'command': 'seo-cli analyze',
'time': '5-15 min',
'cost': '$1.50-2.00'
},
'recategorize': {
'description': 'Re-categorize posts for better organization',
'command': 'seo-cli recategorize',
'time': '5-15 min',
'cost': '$1.50-2.00'
},
'seo-check': {
'description': 'Check SEO quality of titles and descriptions',
'command': 'seo-cli seo-check [--top-n 50]',
'time': '3-5 min',
'cost': 'Free or $0.20-0.50'
},
'analytics': {
'description': 'Combine Google Analytics & Search Console data',
'command': 'seo-cli analytics GA4.csv GSC.csv',
'time': '5 min',
'cost': 'Free'
},
'full-pipeline': {
'description': 'Run complete pipeline: export → analyze → seo-check',
'command': 'seo-cli full-pipeline',
'time': '15-30 min',
'cost': '$1.50-2.50'
},
'categories': {
'description': 'Manage categories across all sites with AI recommendations',
'command': 'seo-cli categories',
'time': '10-20 min',
'cost': '$0.50-1.00'
},
'approve': {
'description': 'Review and approve SEO recommendations',
'command': 'seo-cli approve [csv_file1] [csv_file2]',
'time': 'Variable',
'cost': 'Free'
}
}
print("\n" + "="*70)
print("📋 AVAILABLE WORKFLOWS")
print("="*70 + "\n")
for name, info in workflows.items():
print(f"🔹 {name.upper()}")
print(f" {info['description']}")
print(f" Command: {info['command']}")
print(f" Time: {info['time']} | Cost: {info['cost']}")
print()
def show_help(self):
"""Show help message."""
print("\n" + "="*70)
print("🚀 SEO AUTOMATION CLI - Workflow Orchestrator")
print("="*70 + "\n")
print("QUICK START:")
print(" seo-cli full-pipeline Run complete workflow")
print(" seo-cli export Export all posts")
print(" seo-cli analyze Analyze with AI")
print(" seo-cli recategorize Re-categorize posts with AI")
print(" seo-cli seo-check Check SEO quality")
print()
print("CHAINING WORKFLOWS:")
print(" seo-cli export && seo-cli analyze && seo-cli seo-check")
print()
print("ADVANCED:")
print(" seo-cli seo-check --top-n 50 Check top 50 posts")
print(" seo-cli analytics GA4.csv GSC.csv Import analytics data")
print(" seo-cli status Show output files")
print(" seo-cli list List all workflows")
print()
print("Learn more:")
print(" Read: WORKFLOWS.md (complete guide)")
print(" Read: scripts/*/README.md (workflow details)")
print()
def main():
"""Main entry point."""
cli = SEOCLI()
parser = argparse.ArgumentParser(
description='SEO Automation CLI - Chain workflows together',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
seo-cli export # Export posts
seo-cli full-pipeline # Export + Analyze + SEO check
seo-cli export && seo-cli analyze # Chain commands
seo-cli seo-check --top-n 50 # Check top 50 posts
seo-cli analytics ga4.csv gsc.csv # Import analytics
seo-cli status # Show output files
"""
)
subparsers = parser.add_subparsers(dest='command', help='Workflow to run')
# Export workflow
subparsers.add_parser('export', help='Export all posts from WordPress sites')
# Analyze workflow
subparsers.add_parser('analyze', help='Analyze exported posts with Claude AI')
# Recategorize workflow
subparsers.add_parser('recategorize', help='Re-categorize posts with Claude AI')
# SEO check workflow
seo_parser = subparsers.add_parser('seo-check', help='Check SEO quality of titles/descriptions')
seo_parser.add_argument('--top-n', type=int, help='Analyze top N posts with AI (costs money)')
# Analytics workflow
analytics_parser = subparsers.add_parser('analytics', help='Import Google Analytics & Search Console')
analytics_parser.add_argument('ga_export', help='Path to GA4 export CSV')
analytics_parser.add_argument('gsc_export', help='Path to Search Console export CSV')
# Full pipeline
full_parser = subparsers.add_parser('full-pipeline', help='Complete pipeline: export → analyze → seo-check')
full_parser.add_argument('--no-analyze', action='store_true', help='Skip AI analysis')
full_parser.add_argument('--no-seo', action='store_true', help='Skip SEO check')
# Category management
subparsers.add_parser('categories', help='Manage categories with AI recommendations')
# Approval system
approval_parser = subparsers.add_parser('approve', help='Approve recommendations from CSV files')
approval_parser.add_argument('csv_files', nargs='*', help='CSV files containing recommendations to approve')
# Utilities
subparsers.add_parser('status', help='Show status of output files')
subparsers.add_parser('list', help='List all available workflows')
subparsers.add_parser('help', help='Show this help message')
args = parser.parse_args()
# If no command, show help
if not args.command:
cli.show_help()
return 0
# Route to appropriate command
if args.command == 'export':
success = cli.export_posts()
elif args.command == 'analyze':
success = cli.analyze_with_ai()
elif args.command == 'recategorize':
success = cli.recategorize_with_ai()
elif args.command == 'seo-check':
success = cli.seo_check(top_n=args.top_n)
elif args.command == 'analytics':
success = cli.import_analytics(args.ga_export, args.gsc_export)
elif args.command == 'full-pipeline':
success = cli.full_pipeline(
analyze=not args.no_analyze,
seo=not args.no_seo
)
elif args.command == 'categories':
success = cli.manage_categories()
elif args.command == 'approve':
success = cli.approve_recommendations(args.csv_files)
elif args.command == 'status':
cli.show_status()
success = True
elif args.command == 'list':
cli.list_workflows()
success = True
elif args.command == 'help':
cli.show_help()
success = True
else:
cli.show_help()
success = False
return 0 if success else 1
if __name__ == '__main__':
sys.exit(main())

View File

@@ -1,352 +0,0 @@
#!/usr/bin/env python3
"""
User Approval Mechanism for SEO Recommendations
Allows users to review and approve recommendations from CSV files.
"""
import csv
import json
import logging
import sys
from pathlib import Path
from typing import Dict, List, Optional
from datetime import datetime
from config import Config
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class UserApprovalSystem:
"""System for reviewing and approving SEO recommendations."""
def __init__(self):
"""Initialize the approval system."""
self.output_dir = Path(__file__).parent.parent / 'output'
self.approved_recommendations = []
self.rejected_recommendations = []
self.pending_recommendations = []
def load_recommendations_from_csv(self, csv_file: str) -> List[Dict]:
"""Load recommendations from CSV file."""
recommendations = []
if not Path(csv_file).exists():
logger.error(f"CSV file not found: {csv_file}")
return recommendations
try:
with open(csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
recommendations.append(dict(row))
logger.info(f"Loaded {len(recommendations)} recommendations from {csv_file}")
return recommendations
except Exception as e:
logger.error(f"Error loading CSV: {e}")
return recommendations
def display_recommendation(self, recommendation: Dict, index: int, total: int):
"""Display a single recommendation for user review."""
print(f"\n{'='*80}")
print(f"RECOMMENDATION {index}/{total}")
print(f"{'='*80}")
# Display different fields depending on the type of recommendation
if 'post_title' in recommendation:
print(f"Post Title: {recommendation.get('post_title', 'N/A')}")
print(f"Post ID: {recommendation.get('post_id', 'N/A')}")
print(f"Site: {recommendation.get('site', 'N/A')}")
print(f"Current Categories: {recommendation.get('current_categories', 'N/A')}")
print(f"Proposed Category: {recommendation.get('proposed_category', 'N/A')}")
print(f"Proposed Site: {recommendation.get('proposed_site', 'N/A')}")
print(f"Reason: {recommendation.get('reason', 'N/A')}")
print(f"Confidence: {recommendation.get('confidence', 'N/A')}")
print(f"Content Preview: {recommendation.get('content_preview', 'N/A')[:100]}...")
elif 'title' in recommendation:
print(f"Post Title: {recommendation.get('title', 'N/A')}")
print(f"Post ID: {recommendation.get('post_id', 'N/A')}")
print(f"Site: {recommendation.get('site', 'N/A')}")
print(f"Decision: {recommendation.get('decision', 'N/A')}")
print(f"Recommended Category: {recommendation.get('recommended_category', 'N/A')}")
print(f"Reason: {recommendation.get('reason', 'N/A')}")
print(f"Priority: {recommendation.get('priority', 'N/A')}")
print(f"AI Notes: {recommendation.get('ai_notes', 'N/A')}")
else:
# Generic display for other types of recommendations
for key, value in recommendation.items():
print(f"{key.replace('_', ' ').title()}: {value}")
def get_user_choice(self) -> str:
"""Get user's approval choice."""
while True:
print(f"\nOptions:")
print(f" 'y' or 'yes' - Approve this recommendation")
print(f" 'n' or 'no' - Reject this recommendation")
print(f" 's' or 'skip' - Skip this recommendation for later review")
print(f" 'q' or 'quit' - Quit and save current progress")
choice = input(f"\nEnter your choice: ").strip().lower()
if choice in ['y', 'yes']:
return 'approved'
elif choice in ['n', 'no']:
return 'rejected'
elif choice in ['s', 'skip']:
return 'pending'
elif choice in ['q', 'quit']:
return 'quit'
else:
print("Invalid choice. Please enter 'y', 'n', 's', or 'q'.")
def review_recommendations(self, recommendations: List[Dict], title: str = "Recommendations"):
"""Review recommendations with user interaction."""
print(f"\n{'='*80}")
print(f"REVIEWING {title.upper()}")
print(f"Total recommendations to review: {len(recommendations)}")
print(f"{'='*80}")
for i, recommendation in enumerate(recommendations, 1):
self.display_recommendation(recommendation, i, len(recommendations))
choice = self.get_user_choice()
if choice == 'quit':
logger.info("User chose to quit. Saving progress...")
break
elif choice == 'approved':
recommendation['status'] = 'approved'
self.approved_recommendations.append(recommendation)
logger.info(f"Approved recommendation {i}")
elif choice == 'rejected':
recommendation['status'] = 'rejected'
self.rejected_recommendations.append(recommendation)
logger.info(f"Rejected recommendation {i}")
elif choice == 'pending':
recommendation['status'] = 'pending_review'
self.pending_recommendations.append(recommendation)
logger.info(f"Skipped recommendation {i} for later review")
def export_approved_recommendations(self, filename_suffix: str = "") -> str:
"""Export approved recommendations to CSV."""
if not self.approved_recommendations:
logger.info("No approved recommendations to export")
return ""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"approved_recommendations_{timestamp}{filename_suffix}.csv"
csv_file = self.output_dir / filename
# Get all unique fieldnames from recommendations
fieldnames = set()
for rec in self.approved_recommendations:
fieldnames.update(rec.keys())
fieldnames = sorted(list(fieldnames))
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self.approved_recommendations)
logger.info(f"Exported {len(self.approved_recommendations)} approved recommendations to: {csv_file}")
return str(csv_file)
def export_rejected_recommendations(self, filename_suffix: str = "") -> str:
"""Export rejected recommendations to CSV."""
if not self.rejected_recommendations:
logger.info("No rejected recommendations to export")
return ""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"rejected_recommendations_{timestamp}{filename_suffix}.csv"
csv_file = self.output_dir / filename
# Get all unique fieldnames from recommendations
fieldnames = set()
for rec in self.rejected_recommendations:
fieldnames.update(rec.keys())
fieldnames = sorted(list(fieldnames))
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self.rejected_recommendations)
logger.info(f"Exported {len(self.rejected_recommendations)} rejected recommendations to: {csv_file}")
return str(csv_file)
def export_pending_recommendations(self, filename_suffix: str = "") -> str:
"""Export pending recommendations to CSV."""
if not self.pending_recommendations:
logger.info("No pending recommendations to export")
return ""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"pending_recommendations_{timestamp}{filename_suffix}.csv"
csv_file = self.output_dir / filename
# Get all unique fieldnames from recommendations
fieldnames = set()
for rec in self.pending_recommendations:
fieldnames.update(rec.keys())
fieldnames = sorted(list(fieldnames))
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self.pending_recommendations)
logger.info(f"Exported {len(self.pending_recommendations)} pending recommendations to: {csv_file}")
return str(csv_file)
def run_interactive_approval(self, csv_files: List[str]):
"""Run interactive approval process for multiple CSV files."""
logger.info("="*70)
logger.info("USER APPROVAL SYSTEM FOR SEO RECOMMENDATIONS")
logger.info("="*70)
for csv_file in csv_files:
logger.info(f"\nLoading recommendations from: {csv_file}")
recommendations = self.load_recommendations_from_csv(csv_file)
if not recommendations:
logger.warning(f"No recommendations found in {csv_file}, skipping...")
continue
# Get the filename without path for the title
filename = Path(csv_file).stem
self.review_recommendations(recommendations, title=filename)
# Export results
logger.info("\n" + "="*70)
logger.info("EXPORTING RESULTS")
logger.info("="*70)
approved_file = self.export_approved_recommendations()
rejected_file = self.export_rejected_recommendations()
pending_file = self.export_pending_recommendations()
# Summary
logger.info(f"\n{''*70}")
logger.info("APPROVAL SUMMARY:")
logger.info(f" Approved: {len(self.approved_recommendations)}")
logger.info(f" Rejected: {len(self.rejected_recommendations)}")
logger.info(f" Pending: {len(self.pending_recommendations)}")
logger.info(f"{''*70}")
if approved_file:
logger.info(f"\nApproved recommendations saved to: {approved_file}")
if rejected_file:
logger.info(f"Rejected recommendations saved to: {rejected_file}")
if pending_file:
logger.info(f"Pending recommendations saved to: {pending_file}")
logger.info(f"\n✓ Approval process complete!")
def run_auto_approval(self, csv_files: List[str], auto_approve_threshold: float = 0.8):
"""Auto-approve recommendations based on confidence threshold."""
logger.info("="*70)
logger.info("AUTO APPROVAL SYSTEM FOR SEO RECOMMENDATIONS")
logger.info("="*70)
logger.info(f"Auto-approval threshold: {auto_approve_threshold}")
all_recommendations = []
for csv_file in csv_files:
logger.info(f"\nLoading recommendations from: {csv_file}")
recommendations = self.load_recommendations_from_csv(csv_file)
all_recommendations.extend(recommendations)
approved_count = 0
rejected_count = 0
for rec in all_recommendations:
# Check if there's a confidence field and if it meets the threshold
confidence_str = rec.get('confidence', 'Low').lower()
confidence_value = 0.0
if confidence_str == 'high':
confidence_value = 0.9
elif confidence_str == 'medium':
confidence_value = 0.6
elif confidence_str == 'low':
confidence_value = 0.3
else:
# Try to parse as numeric value if possible
try:
confidence_value = float(confidence_str)
except ValueError:
confidence_value = 0.3 # Default to low
if confidence_value >= auto_approve_threshold:
rec['status'] = 'auto_approved'
self.approved_recommendations.append(rec)
approved_count += 1
else:
rec['status'] = 'auto_rejected'
self.rejected_recommendations.append(rec)
rejected_count += 1
# Export results
logger.info("\n" + "="*70)
logger.info("EXPORTING AUTO-APPROVAL RESULTS")
logger.info("="*70)
approved_file = self.export_approved_recommendations("_auto")
rejected_file = self.export_rejected_recommendations("_auto")
# Summary
logger.info(f"\n{''*70}")
logger.info("AUTO APPROVAL SUMMARY:")
logger.info(f" Auto-approved: {approved_count}")
logger.info(f" Auto-rejected: {rejected_count}")
logger.info(f"{''*70}")
if approved_file:
logger.info(f"\nAuto-approved recommendations saved to: {approved_file}")
if rejected_file:
logger.info(f"Auto-rejected recommendations saved to: {rejected_file}")
logger.info(f"\n✓ Auto-approval process complete!")
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Review and approve SEO recommendations'
)
parser.add_argument(
'csv_files',
nargs='+',
help='CSV files containing recommendations to review'
)
parser.add_argument(
'--auto',
action='store_true',
help='Run auto-approval mode instead of interactive mode'
)
parser.add_argument(
'--threshold',
type=float,
default=0.8,
help='Confidence threshold for auto-approval (default: 0.8)'
)
args = parser.parse_args()
approval_system = UserApprovalSystem()
if args.auto:
approval_system.run_auto_approval(args.csv_files, args.threshold)
else:
approval_system.run_interactive_approval(args.csv_files)
if __name__ == '__main__':
main()

2
seo
View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
SEO Automation CLI - Main executable
Entry point for the SEO automation tool.
Single entry point for SEO automation tool.
"""
import sys

View File

@@ -1,7 +1,14 @@
"""
SEO Automation Tool - Integrated Application
A comprehensive WordPress SEO automation suite.
SEO Automation Tool - Complete Integrated Package
Single entry point for all SEO automation functionality.
"""
__version__ = '1.0.0'
__author__ = 'SEO Automation Team'
__all__ = ['SEOApp', 'PostExporter', 'PostAnalyzer', 'CategoryProposer']
# Import main classes for easy access
from .app import SEOApp
from .exporter import PostExporter
from .analyzer import PostAnalyzer, EnhancedPostAnalyzer
from .category_proposer import CategoryProposer

View File

@@ -1,15 +1,353 @@
"""
Analyzer Module - AI-powered post analysis
Post Analyzer - AI-powered post analysis with selective field support
"""
import sys
import csv
import json
import logging
import shutil
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
import requests
# Import from scripts directory (parent of src)
scripts_dir = Path(__file__).parents[2] / 'scripts'
if str(scripts_dir) not in sys.path:
sys.path.insert(0, str(scripts_dir))
from .config import Config
from ai_analyze_posts_for_decisions import PostAnalyzer
logger = logging.getLogger(__name__)
__all__ = ['PostAnalyzer']
class PostAnalyzer:
"""Basic post analyzer (legacy compatibility)."""
def __init__(self, csv_file: str):
self.csv_file = Path(csv_file)
self.openrouter_api_key = Config.OPENROUTER_API_KEY
self.ai_model = Config.AI_MODEL
self.posts = []
self.analyzed_posts = []
self.api_calls = 0
self.ai_cost = 0.0
def load_csv(self) -> bool:
"""Load posts from CSV."""
if not self.csv_file.exists():
logger.error(f"CSV file not found: {self.csv_file}")
return False
try:
with open(self.csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
self.posts = list(reader)
logger.info(f"✓ Loaded {len(self.posts)} posts")
return True
except Exception as e:
logger.error(f"Error loading CSV: {e}")
return False
def run(self) -> None:
"""Run basic analysis (placeholder for legacy compatibility)."""
if not self.load_csv():
return
logger.warning("Basic PostAnalyzer is deprecated. Use EnhancedPostAnalyzer instead.")
class EnhancedPostAnalyzer:
"""Enhanced analyzer with selective field analysis and in-place updates."""
def __init__(self, csv_file: str, analyze_fields: Optional[List[str]] = None):
"""
Initialize analyzer.
Args:
csv_file: Path to input CSV
analyze_fields: List of fields to analyze ['title', 'meta_description', 'categories', 'site']
"""
self.csv_file = Path(csv_file)
self.openrouter_api_key = Config.OPENROUTER_API_KEY
self.ai_model = Config.AI_MODEL
self.posts = []
self.analyzed_posts = []
self.api_calls = 0
self.ai_cost = 0.0
if analyze_fields is None:
self.analyze_fields = ['title', 'meta_description', 'categories', 'site']
else:
self.analyze_fields = analyze_fields
logger.info(f"Fields to analyze: {', '.join(self.analyze_fields)}")
def load_csv(self) -> bool:
"""Load posts from CSV file."""
logger.info(f"Loading CSV: {self.csv_file}")
if not self.csv_file.exists():
logger.error(f"CSV file not found: {self.csv_file}")
return False
try:
with open(self.csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
self.posts = list(reader)
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
return True
except Exception as e:
logger.error(f"Error loading CSV: {e}")
return False
def get_ai_recommendations(self, batch: List[Dict], fields: List[str]) -> Optional[str]:
"""Get AI recommendations for specific fields."""
if not self.openrouter_api_key:
logger.error("OPENROUTER_API_KEY not set")
return None
# Format posts for AI
formatted_posts = []
for i, post in enumerate(batch, 1):
post_text = f"{i}. POST ID: {post['post_id']}\n"
post_text += f" Site: {post.get('site', '')}\n"
if 'title' in fields:
post_text += f" Title: {post.get('title', '')}\n"
if 'meta_description' in fields:
post_text += f" Meta Description: {post.get('meta_description', '')}\n"
if 'categories' in fields:
post_text += f" Categories: {post.get('categories', '')}\n"
if 'content_preview' in post:
post_text += f" Content Preview: {post.get('content_preview', '')[:300]}...\n"
formatted_posts.append(post_text)
posts_text = "\n".join(formatted_posts)
# Build prompt based on requested fields
prompt_parts = ["Analyze these blog posts and provide recommendations.\n\n"]
if 'site' in fields:
prompt_parts.append("""Website Strategy:
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
- webscroll.fr: Torrenting, File-Sharing, Tracker guides
- hellogeek.net: Low-traffic, experimental, off-brand content
""")
prompt_parts.append(posts_text)
prompt_parts.append("\nFor EACH post, provide a JSON object with:\n{\n")
if 'title' in fields:
prompt_parts.append(' "proposed_title": "<Improved SEO title>",\n')
prompt_parts.append(' "title_reason": "<Reason for title change>",\n')
if 'meta_description' in fields:
prompt_parts.append(' "proposed_meta_description": "<Improved meta description (120-160 chars)>",\n')
prompt_parts.append(' "meta_reason": "<Reason for meta description change>",\n')
if 'categories' in fields:
prompt_parts.append(' "proposed_category": "<Best category>",\n')
prompt_parts.append(' "category_reason": "<Reason for category change>",\n')
if 'site' in fields:
prompt_parts.append(' "proposed_site": "<Best site for this post>",\n')
prompt_parts.append(' "site_reason": "<Reason for site recommendation>",\n')
prompt_parts.append(' "confidence": "<High|Medium|Low>",\n')
prompt_parts.append(' "priority": "<High|Medium|Low>"\n}')
prompt_parts.append("\nReturn ONLY a JSON array of objects, one per post.")
prompt = "".join(prompt_parts)
try:
logger.info(f" Sending batch to AI for analysis...")
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.openrouter_api_key}",
"Content-Type": "application/json",
},
json={
"model": self.ai_model,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3,
},
timeout=60
)
response.raise_for_status()
result = response.json()
self.api_calls += 1
usage = result.get('usage', {})
input_tokens = usage.get('prompt_tokens', 0)
output_tokens = usage.get('completion_tokens', 0)
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
recommendations_text = result['choices'][0]['message']['content'].strip()
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
return recommendations_text
except Exception as e:
logger.error(f"Error getting AI recommendations: {e}")
return None
def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
"""Parse JSON recommendations from AI."""
try:
start_idx = recommendations_json.find('[')
end_idx = recommendations_json.rfind(']') + 1
if start_idx == -1 or end_idx == 0:
logger.error("Could not find JSON array in response")
return []
json_str = recommendations_json[start_idx:end_idx]
recommendations = json.loads(json_str)
return recommendations
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON recommendations: {e}")
return []
def analyze_posts(self, batch_size: int = 10) -> bool:
"""Analyze all posts in batches."""
logger.info("\n" + "="*70)
logger.info("ANALYZING POSTS WITH AI")
logger.info("="*70 + "\n")
batches = [self.posts[i:i + batch_size] for i in range(0, len(self.posts), batch_size)]
logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches...\n")
all_recommendations = {}
for batch_num, batch in enumerate(batches, 1):
logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
recommendations_json = self.get_ai_recommendations(batch, self.analyze_fields)
if not recommendations_json:
logger.error(f" Failed to get recommendations for batch {batch_num}")
continue
recommendations = self.parse_recommendations(recommendations_json)
for rec in recommendations:
all_recommendations[str(rec.get('post_id', ''))] = rec
logger.info(f" ✓ Got {len(recommendations)} recommendations")
logger.info(f"\n✓ Analysis complete!")
logger.info(f" Total recommendations: {len(all_recommendations)}")
logger.info(f" API calls: {self.api_calls}")
logger.info(f" Estimated cost: ${self.ai_cost:.4f}")
# Map recommendations to posts
for post in self.posts:
post_id = str(post['post_id'])
if post_id in all_recommendations:
rec = all_recommendations[post_id]
# Add only requested fields
if 'title' in self.analyze_fields:
post['proposed_title'] = rec.get('proposed_title', post.get('title', ''))
post['title_reason'] = rec.get('title_reason', '')
if 'meta_description' in self.analyze_fields:
post['proposed_meta_description'] = rec.get('proposed_meta_description', post.get('meta_description', ''))
post['meta_reason'] = rec.get('meta_reason', '')
if 'categories' in self.analyze_fields:
post['proposed_category'] = rec.get('proposed_category', post.get('categories', ''))
post['category_reason'] = rec.get('category_reason', '')
if 'site' in self.analyze_fields:
post['proposed_site'] = rec.get('proposed_site', post.get('site', ''))
post['site_reason'] = rec.get('site_reason', '')
post['ai_confidence'] = rec.get('confidence', 'Medium')
post['ai_priority'] = rec.get('priority', 'Medium')
else:
if 'title' in self.analyze_fields:
post['proposed_title'] = post.get('title', '')
post['title_reason'] = 'No AI recommendation'
if 'meta_description' in self.analyze_fields:
post['proposed_meta_description'] = post.get('meta_description', '')
post['meta_reason'] = 'No AI recommendation'
if 'categories' in self.analyze_fields:
post['proposed_category'] = post.get('categories', '')
post['category_reason'] = 'No AI recommendation'
if 'site' in self.analyze_fields:
post['proposed_site'] = post.get('site', '')
post['site_reason'] = 'No AI recommendation'
post['ai_confidence'] = 'Unknown'
post['ai_priority'] = 'Medium'
self.analyzed_posts.append(post)
return len(self.analyzed_posts) > 0
def export_results(self, output_file: Optional[str] = None, update_input: bool = False) -> str:
"""Export results to CSV."""
if update_input:
backup_file = self.csv_file.parent / f"{self.csv_file.stem}_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
shutil.copy2(self.csv_file, backup_file)
logger.info(f"✓ Created backup: {backup_file}")
output_file = self.csv_file
elif not output_file:
output_dir = Path(__file__).parent.parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = output_dir / f'analyzed_posts_{timestamp}.csv'
output_file = Path(output_file)
output_file.parent.mkdir(parents=True, exist_ok=True)
if not self.analyzed_posts:
logger.error("No analyzed posts to export")
return ""
original_fields = list(self.analyzed_posts[0].keys())
new_fields = []
if 'title' in self.analyze_fields:
new_fields.extend(['proposed_title', 'title_reason'])
if 'meta_description' in self.analyze_fields:
new_fields.extend(['proposed_meta_description', 'meta_reason'])
if 'categories' in self.analyze_fields:
new_fields.extend(['proposed_category', 'category_reason'])
if 'site' in self.analyze_fields:
new_fields.extend(['proposed_site', 'site_reason'])
new_fields.extend(['ai_confidence', 'ai_priority'])
fieldnames = original_fields + new_fields
logger.info(f"\nExporting results to: {output_file}")
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self.analyzed_posts)
logger.info(f"✓ Exported {len(self.analyzed_posts)} posts")
return str(output_file)
def run(self, output_file: Optional[str] = None, update_input: bool = False, batch_size: int = 10) -> str:
"""Run complete analysis."""
if not self.load_csv():
return ""
if not self.analyze_posts(batch_size=batch_size):
logger.error("Failed to analyze posts")
return ""
return self.export_results(output_file=output_file, update_input=update_input)

View File

@@ -8,11 +8,8 @@ from datetime import datetime
from typing import Optional, List
from .exporter import PostExporter
from .analyzer import PostAnalyzer
from .recategorizer import PostRecategorizer
from .seo_checker import MultiSiteSEOAnalyzer
from .categories import CategoryManager
from .approval import UserApprovalSystem
from .analyzer import EnhancedPostAnalyzer
from .category_proposer import CategoryProposer
logger = logging.getLogger(__name__)
@@ -22,70 +19,38 @@ class SEOApp:
Main SEO Application class.
Provides a unified interface for all SEO automation tasks.
Inspired by Ruby on Rails' Active Record pattern.
Usage:
app = SEOApp()
app.export()
app.analyze()
app.seo_check()
"""
def __init__(self, verbose: bool = False):
"""
Initialize the SEO application.
Args:
verbose: Enable verbose logging
"""
"""Initialize the SEO application."""
self.verbose = verbose
self.output_dir = Path(__file__).parent.parent.parent / 'output'
self.output_dir.mkdir(parents=True, exist_ok=True)
# Initialize components
self.exporter = None
self.analyzer = None
self.recategorizer = None
self.seo_checker = None
self.category_manager = None
self.approval_system = None
if verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
def export(self) -> str:
"""
Export all posts from WordPress sites.
Returns:
Path to exported CSV file
"""
"""Export all posts from WordPress sites."""
logger.info("📦 Exporting all posts from WordPress sites...")
self.exporter = PostExporter()
self.exporter.run()
exporter = PostExporter()
return exporter.run()
# Get the exported file path
date_str = datetime.now().strftime('%Y-%m-%d')
csv_file = self.output_dir / f'all_posts_{date_str}.csv'
logger.info(f"✅ Export completed: {csv_file}")
return str(csv_file)
def analyze(self, csv_file: Optional[str] = None) -> str:
def analyze(self, csv_file: Optional[str] = None, fields: Optional[List[str]] = None,
update: bool = False, output: Optional[str] = None) -> str:
"""
Analyze posts with AI for recommendations.
Args:
csv_file: Path to CSV file (uses latest export if not provided)
Returns:
Path to analysis results
fields: Fields to analyze ['title', 'meta_description', 'categories', 'site']
update: If True, update input CSV (creates backup)
output: Custom output file path
"""
logger.info("🤖 Analyzing posts with AI for recommendations...")
# Find CSV file
if not csv_file:
csv_file = self._find_latest_export()
@@ -94,26 +59,13 @@ class SEOApp:
logger.info(f"Using file: {csv_file}")
# Run analysis
self.analyzer = PostAnalyzer(csv_file)
self.analyzer.run()
analyzer = EnhancedPostAnalyzer(csv_file, analyze_fields=fields)
return analyzer.run(output_file=output, update_input=update)
logger.info("✅ AI analysis completed!")
return csv_file
def category_propose(self, csv_file: Optional[str] = None, output: Optional[str] = None) -> str:
"""Propose categories for posts."""
logger.info("🏷️ Proposing categories with AI...")
def recategorize(self, csv_file: Optional[str] = None) -> str:
"""
Recategorize posts with AI suggestions.
Args:
csv_file: Path to CSV file (uses latest export if not provided)
Returns:
Path to recategorization results
"""
logger.info("🏷️ Recategorizing posts with AI suggestions...")
# Find CSV file
if not csv_file:
csv_file = self._find_latest_export()
@@ -122,122 +74,11 @@ class SEOApp:
logger.info(f"Using file: {csv_file}")
# Run recategorization
self.recategorizer = PostRecategorizer(csv_file)
self.recategorizer.run()
logger.info("✅ Recategorization completed!")
return csv_file
def seo_check(self, top_n: int = 10) -> None:
"""
Check SEO quality of titles and descriptions.
Args:
top_n: Number of top posts to get AI recommendations for
"""
logger.info("🔍 Checking SEO quality of titles/descriptions...")
self.seo_checker = MultiSiteSEOAnalyzer()
self.seo_checker.run(use_ai=True, top_n=top_n)
logger.info("✅ SEO check completed!")
def categories(self) -> None:
"""Manage categories across all sites."""
logger.info("🗂️ Managing categories across all sites...")
self.category_manager = CategoryManager()
self.category_manager.run()
logger.info("✅ Category management completed!")
def approve(self, files: Optional[List[str]] = None) -> None:
"""
Review and approve recommendations.
Args:
files: List of CSV files to review (auto-detects if not provided)
"""
logger.info("✅ Reviewing and approving recommendations...")
self.approval_system = UserApprovalSystem()
if not files:
# Auto-detect recommendation files
files = self._find_recommendation_files()
if not files:
raise FileNotFoundError("No recommendation files found. Run analyze() or categories() first.")
logger.info(f"Found {len(files)} recommendation files to review")
self.approval_system.run_interactive_approval(files)
logger.info("✅ Approval process completed!")
def full_pipeline(self) -> None:
"""
Run complete workflow: export → analyze → seo_check
"""
logger.info("🚀 Running full SEO automation pipeline...")
# Step 1: Export
logger.info("\n📦 Step 1/3: Exporting posts...")
self.export()
# Step 2: Analyze
logger.info("\n🤖 Step 2/3: Analyzing with AI...")
self.analyze()
# Step 3: SEO Check
logger.info("\n🔍 Step 3/3: Checking SEO quality...")
self.seo_check()
logger.info("\n✅ Full pipeline completed!")
def _find_latest_export(self) -> Optional[str]:
"""
Find the latest exported CSV file.
Returns:
Path to latest CSV file or None if not found
"""
csv_files = list(self.output_dir.glob('all_posts_*.csv'))
if not csv_files:
return None
latest = max(csv_files, key=lambda f: f.stat().st_ctime)
return str(latest)
def _find_recommendation_files(self) -> List[str]:
"""
Find recommendation files in output directory.
Returns:
List of paths to recommendation files
"""
patterns = [
'category_assignments_*.csv',
'posts_with_ai_recommendations_*.csv',
'posts_to_move_*.csv',
'posts_to_consolidate_*.csv',
'posts_to_delete_*.csv'
]
files = []
for pattern in patterns:
files.extend(self.output_dir.glob(pattern))
return [str(f) for f in files]
proposer = CategoryProposer(csv_file)
return proposer.run(output_file=output)
def status(self) -> dict:
"""
Get status of output files.
Returns:
Dictionary with file information
"""
"""Get status of output files."""
files = list(self.output_dir.glob('*.csv'))
status_info = {
@@ -253,3 +94,13 @@ class SEOApp:
})
return status_info
def _find_latest_export(self) -> Optional[str]:
"""Find the latest exported CSV file."""
csv_files = list(self.output_dir.glob('all_posts_*.csv'))
if not csv_files:
return None
latest = max(csv_files, key=lambda f: f.stat().st_ctime)
return str(latest)

View File

@@ -1,15 +1,18 @@
"""
Approval System Module - User approval for recommendations
Placeholder for future implementation.
"""
import sys
from pathlib import Path
import logging
# Import from scripts directory (parent of src)
scripts_dir = Path(__file__).parents[2] / 'scripts'
if str(scripts_dir) not in sys.path:
sys.path.insert(0, str(scripts_dir))
logger = logging.getLogger(__name__)
from user_approval import UserApprovalSystem
__all__ = ['UserApprovalSystem']
class UserApprovalSystem:
"""User approval system (placeholder)."""
def __init__(self):
logger.warning("UserApprovalSystem is a placeholder. Implement full functionality as needed.")
def run_interactive_approval(self, files):
logger.info("Approval system not yet implemented in integrated package.")

View File

@@ -1,15 +1,18 @@
"""
Category Manager Module - Category management across sites
Placeholder for future implementation.
"""
import sys
from pathlib import Path
import logging
# Import from scripts directory (parent of src)
scripts_dir = Path(__file__).parents[2] / 'scripts'
if str(scripts_dir) not in sys.path:
sys.path.insert(0, str(scripts_dir))
logger = logging.getLogger(__name__)
from category_manager import CategoryManager
__all__ = ['CategoryManager']
class CategoryManager:
"""Category manager (placeholder)."""
def __init__(self):
logger.warning("CategoryManager is a placeholder. Implement full functionality as needed.")
def run(self):
logger.info("Category management not yet implemented in integrated package.")

View File

@@ -1,18 +1,16 @@
#!/usr/bin/env python3
"""
Category Proposer - AI-powered category suggestions
Analyzes posts and proposes optimal categories based on content.
"""
import csv
import json
import logging
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
import requests
from datetime import datetime
from config import Config
from .config import Config
logger = logging.getLogger(__name__)
@@ -56,7 +54,6 @@ class CategoryProposer:
logger.error("OPENROUTER_API_KEY not set")
return None
# Format posts for AI
formatted = []
for i, post in enumerate(batch, 1):
text = f"{i}. ID: {post['post_id']}\n"
@@ -161,7 +158,6 @@ Return ONLY a JSON array with one object per post."""
logger.info(f" API calls: {self.api_calls}")
logger.info(f" Cost: ${self.ai_cost:.4f}")
# Map proposals to posts
for post in self.posts:
post_id = str(post['post_id'])
proposal = all_proposals.get(post_id, {})
@@ -180,7 +176,7 @@ Return ONLY a JSON array with one object per post."""
def export_proposals(self, output_file: Optional[str] = None) -> str:
"""Export category proposals to CSV."""
if not output_file:
output_dir = Path(__file__).parent.parent / 'output'
output_dir = Path(__file__).parent.parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = output_dir / f'category_proposals_{timestamp}.csv'
@@ -207,33 +203,10 @@ Return ONLY a JSON array with one object per post."""
def run(self, output_file: Optional[str] = None, batch_size: int = 10) -> str:
"""Run complete category proposal process."""
if not self.load_csv():
sys.exit(1)
return ""
if not self.propose_categories(batch_size=batch_size):
logger.error("Failed to propose categories")
sys.exit(1)
return ""
return self.export_proposals(output_file)
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='AI-powered category proposer for blog posts'
)
parser.add_argument('csv_file', help='Input CSV file with posts')
parser.add_argument('--output', '-o', help='Output CSV file')
parser.add_argument('--batch-size', type=int, default=10, help='Batch size')
args = parser.parse_args()
proposer = CategoryProposer(args.csv_file)
output_file = proposer.run(batch_size=args.batch_size)
logger.info(f"\n✓ Category proposals saved to: {output_file}")
if __name__ == '__main__':
main()

View File

@@ -26,12 +26,9 @@ def main():
Examples:
seo export Export all posts from WordPress sites
seo analyze Analyze posts with AI for recommendations
seo analyze posts.csv Analyze specific CSV file
seo recategorize Recategorize posts with AI
seo seo_check Check SEO quality of titles/descriptions
seo categories Manage categories across sites
seo approve Review and approve recommendations
seo full_pipeline Run complete workflow: export → analyze → seo_check
seo analyze -f title Analyze only titles
seo analyze -u -f meta Update CSV with meta descriptions
seo category_propose Propose categories based on content
seo status Show output files status
"""
)
@@ -40,11 +37,10 @@ Examples:
parser.add_argument('args', nargs='*', help='Arguments for the command')
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
parser.add_argument('--dry-run', action='store_true', help='Show what would be done')
parser.add_argument('--top-n', type=int, default=10, help='Number of top posts for AI analysis')
parser.add_argument('--fields', '-f', nargs='+',
choices=['title', 'meta_description', 'categories', 'site'],
help='Fields to analyze (for analyze command)')
parser.add_argument('--update', '-u', action='store_true', help='Update input file (creates backup)')
help='Fields to analyze')
parser.add_argument('--update', '-u', action='store_true', help='Update input file')
parser.add_argument('--output', '-o', help='Output file path')
args = parser.parse_args()
@@ -67,12 +63,7 @@ Examples:
commands = {
'export': cmd_export,
'analyze': cmd_analyze,
'recategorize': cmd_recategorize,
'seo_check': cmd_seo_check,
'categories': cmd_categories,
'category_propose': cmd_category_propose,
'approve': cmd_approve,
'full_pipeline': cmd_full_pipeline,
'status': cmd_status,
'help': cmd_help,
}
@@ -117,63 +108,19 @@ def cmd_analyze(app, args):
csv_file = args.args[0] if args.args else None
# Use enhanced analyzer if fields are specified or update flag is set
if args.fields or args.update:
from pathlib import Path
import sys
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
sys.path.insert(0, str(scripts_dir))
print(f"Analyzing with fields: {args.fields or 'all'}")
if args.update:
print(f"Will update input CSV (backup will be created)")
from enhanced_analyzer import EnhancedPostAnalyzer
if not csv_file:
csv_file = app._find_latest_export()
if not csv_file:
print("❌ No CSV file found. Provide one or run export first.")
return 1
print(f"Using enhanced analyzer with fields: {args.fields or 'all'}")
analyzer = EnhancedPostAnalyzer(csv_file, analyze_fields=args.fields)
output_file = analyzer.run(
output_file=args.output,
update_input=args.update
result = app.analyze(
csv_file=csv_file,
fields=args.fields,
update=args.update,
output=args.output
)
print(f"✅ Analysis completed! Results: {output_file}")
else:
app.analyze(csv_file)
return 0
def cmd_recategorize(app, args):
"""Recategorize posts with AI."""
if args.dry_run:
print("Would recategorize posts with AI suggestions")
return 0
csv_file = args.args[0] if args.args else None
app.recategorize(csv_file)
return 0
def cmd_seo_check(app, args):
"""Check SEO quality."""
if args.dry_run:
print("Would check SEO quality of titles/descriptions")
return 0
app.seo_check(top_n=args.top_n)
return 0
def cmd_categories(app, args):
"""Manage categories."""
if args.dry_run:
print("Would manage categories across all sites")
return 0
app.categories()
if result:
print(f"✅ Analysis completed! Results: {result}")
return 0
@@ -185,47 +132,10 @@ def cmd_category_propose(app, args):
csv_file = args.args[0] if args.args else None
if not csv_file:
csv_file = app._find_latest_export()
result = app.category_propose(csv_file=csv_file, output=args.output)
if not csv_file:
print("❌ No CSV file found. Provide one or run export first.")
print(" Usage: seo category_propose <csv_file>")
return 1
from pathlib import Path
import sys
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
sys.path.insert(0, str(scripts_dir))
from category_proposer import CategoryProposer
print(f"Proposing categories for: {csv_file}")
proposer = CategoryProposer(csv_file)
output_file = proposer.run(output_file=args.output)
print(f"✅ Category proposals saved to: {output_file}")
return 0
def cmd_approve(app, args):
"""Approve recommendations."""
if args.dry_run:
print("Would review and approve recommendations")
return 0
files = args.args if args.args else None
app.approve(files)
return 0
def cmd_full_pipeline(app, args):
"""Run full pipeline."""
if args.dry_run:
print("Would run full pipeline: export → analyze → seo_check")
return 0
app.full_pipeline()
if result:
print(f"✅ Category proposals saved to: {result}")
return 0
@@ -256,23 +166,15 @@ SEO Automation CLI - Available Commands
Basic Commands:
export Export all posts from WordPress sites
analyze [csv_file] Analyze posts with AI
analyze -f title categories Analyze specific fields only
analyze -u Update input CSV with new columns
recategorize [csv_file] Recategorize posts with AI
seo_check Check SEO quality of titles/descriptions
categories Manage categories across sites
analyze -f title Analyze specific fields (title, meta_description, categories, site)
analyze -u Update input CSV with new columns (creates backup)
category_propose [csv] Propose categories based on content
approve [files...] Review and approve recommendations
full_pipeline Run complete workflow: export → analyze → seo_check
Utility:
status Show output files status
help Show this help message
Options:
--verbose, -v Enable verbose logging
--dry-run Show what would be done without doing it
--top-n N Number of top posts for AI analysis (default: 10)
--fields, -f Fields to analyze: title, meta_description, categories, site
--update, -u Update input CSV file (creates backup)
--output, -o Output file path
@@ -284,8 +186,6 @@ Examples:
seo analyze -f title categories
seo analyze -u -f meta_description
seo category_propose
seo approve output/category_proposals_*.csv
seo full_pipeline
seo status
""")
return 0

View File

@@ -1,16 +1,16 @@
"""
Post Exporter Module - Export posts from WordPress sites
Post Exporter - Export posts from WordPress sites
"""
import csv
import logging
import time
import re
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
import requests
from requests.auth import HTTPBasicAuth
import re
from .config import Config
@@ -26,7 +26,7 @@ class PostExporter:
self.all_posts = []
self.category_cache = {}
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, str]:
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, Dict]:
"""Fetch category names from a WordPress site."""
if site_name in self.category_cache:
return self.category_cache[site_name]
@@ -61,8 +61,6 @@ class PostExporter:
for status in ['publish', 'draft']:
page = 1
status_count = 0
while True:
try:
logger.info(f" Fetching page {page} ({status} posts)...")
@@ -79,17 +77,14 @@ class PostExporter:
break
posts.extend(page_posts)
status_count += len(page_posts)
logger.info(f" ✓ Got {len(page_posts)} posts (total: {len(posts)})")
logger.info(f" ✓ Got {len(page_posts)} posts")
page += 1
time.sleep(0.5)
except requests.exceptions.HTTPError as e:
if response.status_code == 400:
logger.info(f" API limit reached (got {status_count} {status} posts)")
break
else:
logger.error(f"Error on page {page}: {e}")
break
except requests.exceptions.RequestException as e:
@@ -160,7 +155,7 @@ class PostExporter:
if not self.all_posts:
logger.error("No posts to export")
return None
return ""
fieldnames = [
'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id',
@@ -178,10 +173,10 @@ class PostExporter:
logger.info(f"✓ CSV exported to: {output_file}")
return str(output_file)
def run(self):
def run(self) -> str:
"""Run the complete export process."""
logger.info("="*70)
logger.info("EXPORTING ALL POSTS FOR AI DECISION MAKING")
logger.info("EXPORTING ALL POSTS")
logger.info("="*70)
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
@@ -196,31 +191,7 @@ class PostExporter:
if not self.all_posts:
logger.error("No posts found on any site")
return
return ""
self.all_posts.sort(key=lambda x: (x['site'], x['post_id']))
self.export_to_csv()
# Print summary
logger.info("\n" + "="*70)
logger.info("EXPORT SUMMARY")
logger.info("="*70)
by_site = {}
for post in self.all_posts:
site = post['site']
if site not in by_site:
by_site[site] = {'total': 0, 'published': 0, 'draft': 0}
by_site[site]['total'] += 1
if post['status'] == 'publish':
by_site[site]['published'] += 1
else:
by_site[site]['draft'] += 1
for site, stats in sorted(by_site.items()):
logger.info(f"\n{site}:")
logger.info(f" Total: {stats['total']}")
logger.info(f" Published: {stats['published']}")
logger.info(f" Drafts: {stats['draft']}")
logger.info(f"\n✓ Export complete!")
return self.export_to_csv()

View File

@@ -1,15 +1,19 @@
"""
Recategorizer Module - AI-powered post recategorization
Placeholder for future implementation.
"""
import sys
from pathlib import Path
import logging
# Import from scripts directory (parent of src)
scripts_dir = Path(__file__).parents[2] / 'scripts'
if str(scripts_dir) not in sys.path:
sys.path.insert(0, str(scripts_dir))
logger = logging.getLogger(__name__)
from ai_recategorize_posts import PostRecategorizer
__all__ = ['PostRecategorizer']
class PostRecategorizer:
"""Post recategorizer (placeholder)."""
def __init__(self, csv_file):
self.csv_file = csv_file
logger.warning("PostRecategorizer is a placeholder. Implement full functionality as needed.")
def run(self):
logger.info("Recategorization not yet implemented in integrated package.")

View File

@@ -1,15 +1,18 @@
"""
SEO Checker Module - SEO quality analysis
Placeholder for future implementation.
"""
import sys
from pathlib import Path
import logging
# Import from scripts directory (parent of src)
scripts_dir = Path(__file__).parents[2] / 'scripts'
if str(scripts_dir) not in sys.path:
sys.path.insert(0, str(scripts_dir))
logger = logging.getLogger(__name__)
from multi_site_seo_analyzer import MultiSiteSEOAnalyzer
__all__ = ['MultiSiteSEOAnalyzer']
class MultiSiteSEOAnalyzer:
"""SEO quality analyzer (placeholder)."""
def __init__(self):
logger.warning("MultiSiteSEOAnalyzer is a placeholder. Implement full functionality as needed.")
def run(self, use_ai=True, top_n=10):
logger.info("SEO check not yet implemented in integrated package.")