Refactor to single integrated package - Remove scripts folder
Major refactoring to create a unified, self-contained Python package: ### Architecture Changes: - Removed scripts/ directory completely - All functionality now in src/seo/ package - Single entry point: ./seo (imports from src/seo/cli) - No external dependencies on scripts folder ### New Package Structure: src/seo/ ├── __init__.py - Package exports (SEOApp, PostExporter, etc.) ├── cli.py - Command-line interface ├── app.py - Main application class ├── config.py - Configuration management ├── exporter.py - Post export functionality (self-contained) ├── analyzer.py - Enhanced analyzer with selective fields ├── category_proposer.py - AI category proposals (self-contained) ├── seo_checker.py - Placeholder for future implementation ├── categories.py - Placeholder for future implementation ├── approval.py - Placeholder for future implementation └── recategorizer.py - Placeholder for future implementation ### Features: - All modules are self-contained (no scripts dependencies) - EnhancedPostAnalyzer with selective field analysis - CategoryProposer for AI-powered category suggestions - Support for in-place CSV updates with backups - Clean, integrated codebase ### CLI Commands: - seo export - Export posts from WordPress - seo analyze - Analyze with AI (supports -f fields, -u update) - seo category_propose - Propose categories - seo status - Show output files - seo help - Show help ### Usage Examples: ./seo export ./seo analyze -f title categories ./seo analyze -u -f meta_description ./seo category_propose ./seo status ### Benefits: - Single source of truth - Easier to maintain and extend - Proper Python package structure - Can be installed with pip install -e . - Clean imports throughout - No path resolution issues Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
@@ -1,453 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AI-Powered Post Analysis and Recommendation Script
|
||||
Analyzes exported posts CSV using Claude via OpenRouter and provides
|
||||
clear, automation-friendly recommendations for:
|
||||
- Which site to move posts to
|
||||
- Categories to set
|
||||
- Posts to consolidate
|
||||
- Posts to delete
|
||||
- Posts to optimize
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import requests
|
||||
from datetime import datetime
|
||||
from config import Config
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PostAnalyzer:
|
||||
"""Analyze posts CSV using Claude AI via OpenRouter."""
|
||||
|
||||
def __init__(self, csv_file: str):
|
||||
"""Initialize analyzer with CSV file."""
|
||||
self.csv_file = Path(csv_file)
|
||||
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
||||
self.posts = []
|
||||
self.analyzed_posts = []
|
||||
self.api_calls = 0
|
||||
self.ai_cost = 0.0
|
||||
|
||||
def load_csv(self) -> bool:
|
||||
"""Load posts from CSV file."""
|
||||
logger.info(f"Loading CSV: {self.csv_file}")
|
||||
|
||||
if not self.csv_file.exists():
|
||||
logger.error(f"CSV file not found: {self.csv_file}")
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(self.csv_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
self.posts = list(reader)
|
||||
|
||||
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
|
||||
|
||||
# Group by site for stats
|
||||
by_site = {}
|
||||
for post in self.posts:
|
||||
site = post.get('site', '')
|
||||
if site not in by_site:
|
||||
by_site[site] = 0
|
||||
by_site[site] += 1
|
||||
|
||||
for site, count in by_site.items():
|
||||
logger.info(f" {site}: {count} posts")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading CSV: {e}")
|
||||
return False
|
||||
|
||||
def batch_posts_for_analysis(self, batch_size: int = 10) -> List[List[Dict]]:
|
||||
"""Batch posts for AI analysis to manage token usage."""
|
||||
batches = []
|
||||
for i in range(0, len(self.posts), batch_size):
|
||||
batches.append(self.posts[i:i + batch_size])
|
||||
return batches
|
||||
|
||||
def format_batch_for_ai(self, batch: List[Dict]) -> str:
|
||||
"""Format batch of posts for AI analysis."""
|
||||
formatted = "POSTS TO ANALYZE:\n\n"
|
||||
|
||||
for i, post in enumerate(batch, 1):
|
||||
formatted += f"{i}. POST ID: {post['post_id']}\n"
|
||||
formatted += f" Site: {post['site']}\n"
|
||||
formatted += f" Title: {post['title']}\n"
|
||||
formatted += f" Status: {post['status']}\n"
|
||||
formatted += f" Word Count: {post['word_count']}\n"
|
||||
formatted += f" Content: {post['content_preview']}\n"
|
||||
formatted += f" Current Categories: {post['categories']}\n"
|
||||
formatted += f" Meta Description: {post['meta_description']}\n"
|
||||
formatted += "\n"
|
||||
|
||||
return formatted
|
||||
|
||||
def get_ai_recommendations(self, batch: List[Dict]) -> Optional[str]:
|
||||
"""Get AI recommendations for a batch of posts."""
|
||||
if not self.openrouter_api_key:
|
||||
logger.error("OPENROUTER_API_KEY not set")
|
||||
return None
|
||||
|
||||
batch_text = self.format_batch_for_ai(batch)
|
||||
|
||||
prompt = f"""Analyze these blog posts and provide clear, actionable recommendations.
|
||||
|
||||
Website Strategy:
|
||||
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
|
||||
- webscroll.fr: Torrenting, File-Sharing, Tracker guides (niche audience)
|
||||
- hellogeek.net: Low-traffic, experimental, off-brand, or niche content
|
||||
|
||||
{batch_text}
|
||||
|
||||
For EACH post, provide a JSON object with:
|
||||
{{
|
||||
"post_id": <id>,
|
||||
"decision": "<ACTION>" where ACTION is ONE of:
|
||||
- "Keep on mistergeek.net" (high-value, high-traffic)
|
||||
- "Move to webscroll.fr" (torrenting/file-sharing content)
|
||||
- "Move to hellogeek.net" (low-traffic or off-brand)
|
||||
- "Delete" (spam, extremely low quality, zero traffic)
|
||||
- "Consolidate with post_id:<id>" (similar content, duplicate)
|
||||
"category": "<CATEGORY>" where category is ONE of:
|
||||
- "VPN"
|
||||
- "Software/Tools"
|
||||
- "Gaming"
|
||||
- "Streaming"
|
||||
- "Torrenting"
|
||||
- "File-Sharing"
|
||||
- "SEO"
|
||||
- "Content Marketing"
|
||||
- "Other"
|
||||
"reason": "<Brief reason for decision>",
|
||||
"priority": "<High|Medium|Low>",
|
||||
"notes": "<Any additional notes>"
|
||||
}}
|
||||
|
||||
Return ONLY a JSON array. Example:
|
||||
[
|
||||
{{"post_id": 2845, "decision": "Keep on mistergeek.net", "category": "VPN", "reason": "High traffic, core topic", "priority": "High", "notes": "Already optimized"}},
|
||||
{{"post_id": 1234, "decision": "Move to webscroll.fr", "category": "Torrenting", "reason": "Torrent tracker content", "priority": "Medium", "notes": "Good SEO potential on target site"}}
|
||||
]
|
||||
|
||||
Analyze all posts and provide recommendations for EVERY post in the batch."""
|
||||
|
||||
try:
|
||||
logger.info(f" Sending batch to Claude for analysis...")
|
||||
|
||||
response = requests.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "anthropic/claude-3.5-sonnet",
|
||||
"messages": [
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": 0.3, # Lower temp for more consistent recommendations
|
||||
},
|
||||
timeout=60
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
self.api_calls += 1
|
||||
|
||||
# Track cost
|
||||
usage = result.get('usage', {})
|
||||
input_tokens = usage.get('prompt_tokens', 0)
|
||||
output_tokens = usage.get('completion_tokens', 0)
|
||||
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
||||
|
||||
recommendations_text = result['choices'][0]['message']['content'].strip()
|
||||
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
|
||||
|
||||
return recommendations_text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting AI recommendations: {e}")
|
||||
return None
|
||||
|
||||
def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
|
||||
"""Parse JSON recommendations from AI."""
|
||||
try:
|
||||
# Try to extract JSON from response
|
||||
start_idx = recommendations_json.find('[')
|
||||
end_idx = recommendations_json.rfind(']') + 1
|
||||
|
||||
if start_idx == -1 or end_idx == 0:
|
||||
logger.error("Could not find JSON array in response")
|
||||
return []
|
||||
|
||||
json_str = recommendations_json[start_idx:end_idx]
|
||||
recommendations = json.loads(json_str)
|
||||
|
||||
return recommendations
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing JSON recommendations: {e}")
|
||||
logger.debug(f"Response was: {recommendations_json[:500]}")
|
||||
return []
|
||||
|
||||
def analyze_all_posts(self) -> bool:
|
||||
"""Analyze all posts in batches."""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("ANALYZING POSTS WITH AI")
|
||||
logger.info("="*70 + "\n")
|
||||
|
||||
batches = self.batch_posts_for_analysis(batch_size=10)
|
||||
logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches of 10...\n")
|
||||
|
||||
all_recommendations = {}
|
||||
|
||||
for batch_num, batch in enumerate(batches, 1):
|
||||
logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
|
||||
|
||||
recommendations_json = self.get_ai_recommendations(batch)
|
||||
|
||||
if not recommendations_json:
|
||||
logger.error(f" Failed to get recommendations for batch {batch_num}")
|
||||
continue
|
||||
|
||||
recommendations = self.parse_recommendations(recommendations_json)
|
||||
|
||||
for rec in recommendations:
|
||||
all_recommendations[str(rec.get('post_id', ''))] = rec
|
||||
|
||||
logger.info(f" ✓ Got {len(recommendations)} recommendations")
|
||||
|
||||
logger.info(f"\n✓ Analysis complete!")
|
||||
logger.info(f" Total recommendations: {len(all_recommendations)}")
|
||||
logger.info(f" API calls: {self.api_calls}")
|
||||
logger.info(f" Estimated cost: ${self.ai_cost:.4f}")
|
||||
|
||||
# Map recommendations to posts
|
||||
for post in self.posts:
|
||||
post_id = str(post['post_id'])
|
||||
if post_id in all_recommendations:
|
||||
rec = all_recommendations[post_id]
|
||||
post['decision'] = rec.get('decision', 'No decision')
|
||||
post['recommended_category'] = rec.get('category', 'Other')
|
||||
post['reason'] = rec.get('reason', '')
|
||||
post['priority'] = rec.get('priority', 'Medium')
|
||||
post['ai_notes'] = rec.get('notes', '')
|
||||
else:
|
||||
post['decision'] = 'Pending'
|
||||
post['recommended_category'] = 'Other'
|
||||
post['reason'] = 'No recommendation'
|
||||
post['priority'] = 'Medium'
|
||||
post['ai_notes'] = ''
|
||||
|
||||
self.analyzed_posts.append(post)
|
||||
|
||||
return len(self.analyzed_posts) > 0
|
||||
|
||||
def export_with_recommendations(self) -> Tuple[str, str, str, str]:
|
||||
"""Export CSV with recommendations and create action-specific files."""
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
|
||||
# Main file with all recommendations
|
||||
main_file = output_dir / f'posts_with_ai_recommendations_{timestamp}.csv'
|
||||
|
||||
# Action-specific files
|
||||
moves_file = output_dir / f'posts_to_move_{timestamp}.csv'
|
||||
consolidate_file = output_dir / f'posts_to_consolidate_{timestamp}.csv'
|
||||
delete_file = output_dir / f'posts_to_delete_{timestamp}.csv'
|
||||
|
||||
# Export main file
|
||||
fieldnames = list(self.analyzed_posts[0].keys()) + [
|
||||
'decision',
|
||||
'recommended_category',
|
||||
'reason',
|
||||
'priority',
|
||||
'ai_notes'
|
||||
]
|
||||
|
||||
logger.info(f"\nExporting recommendations to CSV...")
|
||||
|
||||
with open(main_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(self.analyzed_posts)
|
||||
|
||||
logger.info(f"✓ Main file: {main_file}")
|
||||
|
||||
# Export action-specific files
|
||||
posts_to_move = [p for p in self.analyzed_posts if 'Move to' in p.get('decision', '')]
|
||||
posts_to_consolidate = [p for p in self.analyzed_posts if 'Consolidate' in p.get('decision', '')]
|
||||
posts_to_delete = [p for p in self.analyzed_posts if p.get('decision') == 'Delete']
|
||||
|
||||
# Moves file
|
||||
if posts_to_move:
|
||||
with open(moves_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(posts_to_move)
|
||||
logger.info(f"✓ Moves file ({len(posts_to_move)} posts): {moves_file}")
|
||||
|
||||
# Consolidate file
|
||||
if posts_to_consolidate:
|
||||
with open(consolidate_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(posts_to_consolidate)
|
||||
logger.info(f"✓ Consolidate file ({len(posts_to_consolidate)} posts): {consolidate_file}")
|
||||
|
||||
# Delete file
|
||||
if posts_to_delete:
|
||||
with open(delete_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(posts_to_delete)
|
||||
logger.info(f"✓ Delete file ({len(posts_to_delete)} posts): {delete_file}")
|
||||
|
||||
return (
|
||||
str(main_file),
|
||||
str(moves_file) if posts_to_move else None,
|
||||
str(consolidate_file) if posts_to_consolidate else None,
|
||||
str(delete_file) if posts_to_delete else None
|
||||
)
|
||||
|
||||
def print_summary(self):
|
||||
"""Print analysis summary."""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("ANALYSIS SUMMARY")
|
||||
logger.info("="*70 + "\n")
|
||||
|
||||
# Count decisions
|
||||
decisions = {}
|
||||
for post in self.analyzed_posts:
|
||||
decision = post.get('decision', 'Unknown')
|
||||
decisions[decision] = decisions.get(decision, 0) + 1
|
||||
|
||||
logger.info("DECISIONS:")
|
||||
for decision, count in sorted(decisions.items(), key=lambda x: x[1], reverse=True):
|
||||
logger.info(f" {decision}: {count} posts")
|
||||
|
||||
# Count categories
|
||||
categories = {}
|
||||
for post in self.analyzed_posts:
|
||||
cat = post.get('recommended_category', 'Other')
|
||||
categories[cat] = categories.get(cat, 0) + 1
|
||||
|
||||
logger.info("\nRECOMMENDED CATEGORIES:")
|
||||
for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
|
||||
logger.info(f" {cat}: {count} posts")
|
||||
|
||||
# Count priorities
|
||||
priorities = {}
|
||||
for post in self.analyzed_posts:
|
||||
priority = post.get('priority', 'Unknown')
|
||||
priorities[priority] = priorities.get(priority, 0) + 1
|
||||
|
||||
logger.info("\nPRIORITY BREAKDOWN:")
|
||||
for priority in ['High', 'Medium', 'Low']:
|
||||
count = priorities.get(priority, 0)
|
||||
logger.info(f" {priority}: {count} posts")
|
||||
|
||||
# By site
|
||||
logger.info("\nBY SITE:")
|
||||
by_site = {}
|
||||
for post in self.analyzed_posts:
|
||||
site = post.get('site', 'Unknown')
|
||||
if site not in by_site:
|
||||
by_site[site] = []
|
||||
by_site[site].append(post.get('decision', 'Unknown'))
|
||||
|
||||
for site in sorted(by_site.keys()):
|
||||
logger.info(f"\n {site}:")
|
||||
decisions_for_site = {}
|
||||
for decision in by_site[site]:
|
||||
decisions_for_site[decision] = decisions_for_site.get(decision, 0) + 1
|
||||
|
||||
for decision, count in sorted(decisions_for_site.items()):
|
||||
logger.info(f" {decision}: {count}")
|
||||
|
||||
def run(self):
|
||||
"""Run complete analysis."""
|
||||
logger.info("="*70)
|
||||
logger.info("AI-POWERED POST ANALYSIS AND RECOMMENDATIONS")
|
||||
logger.info("="*70)
|
||||
|
||||
# Load CSV
|
||||
if not self.load_csv():
|
||||
sys.exit(1)
|
||||
|
||||
# Analyze posts
|
||||
if not self.analyze_all_posts():
|
||||
logger.error("Failed to analyze posts")
|
||||
sys.exit(1)
|
||||
|
||||
# Print summary
|
||||
self.print_summary()
|
||||
|
||||
# Export results
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("EXPORTING RESULTS")
|
||||
logger.info("="*70)
|
||||
|
||||
main_file, moves_file, consol_file, delete_file = self.export_with_recommendations()
|
||||
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("NEXT STEPS")
|
||||
logger.info("="*70)
|
||||
logger.info("\n1. Review main file with all recommendations:")
|
||||
logger.info(f" {main_file}")
|
||||
logger.info("\n2. Execute moves (automate with script):")
|
||||
if moves_file:
|
||||
logger.info(f" {moves_file}")
|
||||
else:
|
||||
logger.info(" No posts to move")
|
||||
|
||||
logger.info("\n3. Consolidate duplicates:")
|
||||
if consol_file:
|
||||
logger.info(f" {consol_file}")
|
||||
else:
|
||||
logger.info(" No posts to consolidate")
|
||||
|
||||
logger.info("\n4. Delete low-quality posts:")
|
||||
if delete_file:
|
||||
logger.info(f" {delete_file}")
|
||||
else:
|
||||
logger.info(" No posts to delete")
|
||||
|
||||
logger.info("\n✓ Analysis complete!")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze exported posts CSV using Claude AI and provide recommendations'
|
||||
)
|
||||
parser.add_argument(
|
||||
'csv_file',
|
||||
help='Path to exported posts CSV file'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = PostAnalyzer(args.csv_file)
|
||||
analyzer.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,382 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AI-Powered Post Re-categorization
|
||||
Analyzes exported posts using Claude AI via OpenRouter and provides
|
||||
category recommendations for better content organization.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import requests
|
||||
from datetime import datetime
|
||||
from config import Config
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PostRecategorizer:
|
||||
"""Re-categorize posts using Claude AI via OpenRouter."""
|
||||
|
||||
def __init__(self, csv_file: str):
|
||||
"""Initialize recategorizer with CSV file."""
|
||||
self.csv_file = Path(csv_file)
|
||||
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
||||
self.posts = []
|
||||
self.recategorized_posts = []
|
||||
self.api_calls = 0
|
||||
self.ai_cost = 0.0
|
||||
|
||||
def load_csv(self) -> bool:
|
||||
"""Load posts from CSV file."""
|
||||
logger.info(f"Loading CSV: {self.csv_file}")
|
||||
|
||||
if not self.csv_file.exists():
|
||||
logger.error(f"CSV file not found: {self.csv_file}")
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(self.csv_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
self.posts = list(reader)
|
||||
|
||||
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
|
||||
|
||||
# Group by site for stats
|
||||
by_site = {}
|
||||
for post in self.posts:
|
||||
site = post.get('site', '')
|
||||
if site not in by_site:
|
||||
by_site[site] = 0
|
||||
by_site[site] += 1
|
||||
|
||||
for site, count in by_site.items():
|
||||
logger.info(f" {site}: {count} posts")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading CSV: {e}")
|
||||
return False
|
||||
|
||||
def batch_posts_for_analysis(self, batch_size: int = 10) -> List[List[Dict]]:
|
||||
"""Batch posts for AI analysis to manage token usage."""
|
||||
batches = []
|
||||
for i in range(0, len(self.posts), batch_size):
|
||||
batches.append(self.posts[i:i + batch_size])
|
||||
return batches
|
||||
|
||||
def format_batch_for_ai(self, batch: List[Dict]) -> str:
|
||||
"""Format batch of posts for AI analysis."""
|
||||
formatted = "POSTS TO RECATEGORIZE:\n\n"
|
||||
|
||||
for i, post in enumerate(batch, 1):
|
||||
formatted += f"{i}. POST ID: {post['post_id']}\n"
|
||||
formatted += f" Site: {post['site']}\n"
|
||||
formatted += f" Title: {post['title']}\n"
|
||||
formatted += f" Current Categories: {post.get('categories', 'None')}\n"
|
||||
formatted += f" Content: {post.get('content_preview', '')}...\n"
|
||||
formatted += f" Word Count: {post.get('word_count', '0')}\n"
|
||||
formatted += "\n"
|
||||
|
||||
return formatted
|
||||
|
||||
def get_ai_recommendations(self, batch: List[Dict]) -> Optional[str]:
|
||||
"""Get AI category recommendations for a batch of posts."""
|
||||
if not self.openrouter_api_key:
|
||||
logger.error("OPENROUTER_API_KEY not set")
|
||||
return None
|
||||
|
||||
batch_text = self.format_batch_for_ai(batch)
|
||||
|
||||
prompt = f"""Analyze these blog posts and recommend optimal categories.
|
||||
|
||||
Website Strategy:
|
||||
- mistergeek.net: VPN, Software/Tools, Gaming, General Tech, SEO, Content Marketing
|
||||
- webscroll.fr: Torrenting, File-Sharing, Tracker Guides
|
||||
- hellogeek.net: Experimental, Low-traffic, Off-brand content
|
||||
|
||||
{batch_text}
|
||||
|
||||
For EACH post, provide a JSON object with:
|
||||
{{
|
||||
"post_id": <id>,
|
||||
"current_categories": "<current>",
|
||||
"recommended_categories": "<comma-separated categories>",
|
||||
"reason": "<Brief reason for recommendation>",
|
||||
"confidence": "High|Medium|Low"
|
||||
}}
|
||||
|
||||
Return ONLY a JSON array. Example:
|
||||
[
|
||||
{{"post_id": 2845, "current_categories": "VPN", "recommended_categories": "VPN, Security", "reason": "Add security angle", "confidence": "High"}},
|
||||
{{"post_id": 1234, "current_categories": "Other", "recommended_categories": "Torrenting, Guides", "reason": "Torrent-specific content", "confidence": "Medium"}}
|
||||
]
|
||||
|
||||
Analyze all posts and provide recommendations for EVERY post in the batch."""
|
||||
|
||||
try:
|
||||
logger.info(f" Sending batch to Claude for recategorization...")
|
||||
|
||||
response = requests.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "anthropic/claude-3.5-sonnet",
|
||||
"messages": [
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": 0.3,
|
||||
},
|
||||
timeout=60
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
self.api_calls += 1
|
||||
|
||||
# Track cost
|
||||
usage = result.get('usage', {})
|
||||
input_tokens = usage.get('prompt_tokens', 0)
|
||||
output_tokens = usage.get('completion_tokens', 0)
|
||||
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
||||
|
||||
recommendations_text = result['choices'][0]['message']['content'].strip()
|
||||
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
|
||||
|
||||
return recommendations_text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting AI recommendations: {e}")
|
||||
return None
|
||||
|
||||
def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
|
||||
"""Parse JSON recommendations from AI."""
|
||||
try:
|
||||
# Try to extract JSON from response
|
||||
start_idx = recommendations_json.find('[')
|
||||
end_idx = recommendations_json.rfind(']') + 1
|
||||
|
||||
if start_idx == -1 or end_idx == 0:
|
||||
logger.error("Could not find JSON array in response")
|
||||
return []
|
||||
|
||||
json_str = recommendations_json[start_idx:end_idx]
|
||||
recommendations = json.loads(json_str)
|
||||
|
||||
return recommendations
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing JSON recommendations: {e}")
|
||||
logger.debug(f"Response was: {recommendations_json[:500]}")
|
||||
return []
|
||||
|
||||
def analyze_all_posts(self) -> bool:
|
||||
"""Analyze all posts in batches."""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("RECATEGORIZING POSTS WITH AI")
|
||||
logger.info("="*70 + "\n")
|
||||
|
||||
batches = self.batch_posts_for_analysis(batch_size=10)
|
||||
logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches of 10...\n")
|
||||
|
||||
all_recommendations = {}
|
||||
|
||||
for batch_num, batch in enumerate(batches, 1):
|
||||
logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
|
||||
|
||||
recommendations_json = self.get_ai_recommendations(batch)
|
||||
|
||||
if not recommendations_json:
|
||||
logger.error(f" Failed to get recommendations for batch {batch_num}")
|
||||
continue
|
||||
|
||||
recommendations = self.parse_recommendations(recommendations_json)
|
||||
|
||||
for rec in recommendations:
|
||||
all_recommendations[str(rec.get('post_id', ''))] = rec
|
||||
|
||||
logger.info(f" ✓ Got {len(recommendations)} recommendations")
|
||||
|
||||
logger.info(f"\n✓ Analysis complete!")
|
||||
logger.info(f" Total recommendations: {len(all_recommendations)}")
|
||||
logger.info(f" API calls: {self.api_calls}")
|
||||
logger.info(f" Estimated cost: ${self.ai_cost:.4f}")
|
||||
|
||||
# Map recommendations to posts
|
||||
for post in self.posts:
|
||||
post_id = str(post['post_id'])
|
||||
if post_id in all_recommendations:
|
||||
rec = all_recommendations[post_id]
|
||||
post['recommended_categories'] = rec.get('recommended_categories', post.get('categories', ''))
|
||||
post['recategorization_reason'] = rec.get('reason', '')
|
||||
post['recategorization_confidence'] = rec.get('confidence', 'Medium')
|
||||
else:
|
||||
post['recommended_categories'] = post.get('categories', '')
|
||||
post['recategorization_reason'] = 'No recommendation'
|
||||
post['recategorization_confidence'] = 'Unknown'
|
||||
|
||||
self.recategorized_posts.append(post)
|
||||
|
||||
return len(self.recategorized_posts) > 0
|
||||
|
||||
def export_with_recommendations(self) -> Tuple[str, str]:
|
||||
"""Export CSV with recategorization recommendations."""
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
|
||||
# Main file with all recommendations
|
||||
main_file = output_dir / f'posts_with_recategorization_{timestamp}.csv'
|
||||
|
||||
# Differences file (only posts with different recommendations)
|
||||
changes_file = output_dir / f'category_changes_only_{timestamp}.csv'
|
||||
|
||||
# Full fieldnames including new recommendation columns
|
||||
fieldnames = list(self.recategorized_posts[0].keys()) + [
|
||||
'recommended_categories',
|
||||
'recategorization_reason',
|
||||
'recategorization_confidence'
|
||||
]
|
||||
|
||||
logger.info(f"\nExporting recategorization recommendations to CSV...")
|
||||
|
||||
# Export main file with all posts
|
||||
with open(main_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(self.recategorized_posts)
|
||||
|
||||
logger.info(f"✓ Main file: {main_file}")
|
||||
|
||||
# Export changes file (only posts where category changed)
|
||||
posts_with_changes = [
|
||||
p for p in self.recategorized_posts
|
||||
if p.get('categories', '') != p.get('recommended_categories', '')
|
||||
]
|
||||
|
||||
if posts_with_changes:
|
||||
with open(changes_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(posts_with_changes)
|
||||
logger.info(f"✓ Changes file ({len(posts_with_changes)} posts): {changes_file}")
|
||||
else:
|
||||
logger.info(f"ℹ No category changes recommended")
|
||||
|
||||
return (str(main_file), str(changes_file) if posts_with_changes else None)
|
||||
|
||||
def print_summary(self):
|
||||
"""Print recategorization summary."""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("RECATEGORIZATION SUMMARY")
|
||||
logger.info("="*70 + "\n")
|
||||
|
||||
# Count changes by site
|
||||
by_site = {}
|
||||
total_changes = 0
|
||||
|
||||
for post in self.recategorized_posts:
|
||||
site = post.get('site', 'Unknown')
|
||||
if site not in by_site:
|
||||
by_site[site] = {'total': 0, 'changed': 0}
|
||||
|
||||
by_site[site]['total'] += 1
|
||||
|
||||
if post.get('categories', '') != post.get('recommended_categories', ''):
|
||||
by_site[site]['changed'] += 1
|
||||
total_changes += 1
|
||||
|
||||
logger.info("CHANGES BY SITE:")
|
||||
for site in sorted(by_site.keys()):
|
||||
stats = by_site[site]
|
||||
logger.info(f" {site}: {stats['changed']} changes out of {stats['total']} posts")
|
||||
|
||||
logger.info(f"\nTOTAL CHANGES: {total_changes} out of {len(self.recategorized_posts)} posts")
|
||||
logger.info(f" ({(total_changes/len(self.recategorized_posts)*100):.1f}% of posts)")
|
||||
|
||||
# Confidence breakdown
|
||||
logger.info("\nRECOMMENDATION CONFIDENCE:")
|
||||
confidence_counts = {}
|
||||
for post in self.recategorized_posts:
|
||||
conf = post.get('recategorization_confidence', 'Unknown')
|
||||
confidence_counts[conf] = confidence_counts.get(conf, 0) + 1
|
||||
|
||||
for conf in ['High', 'Medium', 'Low', 'Unknown']:
|
||||
count = confidence_counts.get(conf, 0)
|
||||
if count > 0:
|
||||
logger.info(f" {conf}: {count} posts ({(count/len(self.recategorized_posts)*100):.1f}%)")
|
||||
|
||||
def run(self):
|
||||
"""Run complete recategorization analysis."""
|
||||
logger.info("="*70)
|
||||
logger.info("AI-POWERED POST RECATEGORIZATION")
|
||||
logger.info("="*70)
|
||||
|
||||
# Load CSV
|
||||
if not self.load_csv():
|
||||
sys.exit(1)
|
||||
|
||||
# Analyze posts
|
||||
if not self.analyze_all_posts():
|
||||
logger.error("Failed to analyze posts")
|
||||
sys.exit(1)
|
||||
|
||||
# Print summary
|
||||
self.print_summary()
|
||||
|
||||
# Export results
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("EXPORTING RESULTS")
|
||||
logger.info("="*70)
|
||||
|
||||
main_file, changes_file = self.export_with_recommendations()
|
||||
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("NEXT STEPS")
|
||||
logger.info("="*70)
|
||||
logger.info("\n1. Review recategorization recommendations:")
|
||||
logger.info(f" {main_file}")
|
||||
logger.info("\n2. Review only posts with category changes:")
|
||||
if changes_file:
|
||||
logger.info(f" {changes_file}")
|
||||
else:
|
||||
logger.info(" No changes recommended")
|
||||
logger.info("\n3. Apply recommendations:")
|
||||
logger.info(" Use categorization automation script (coming soon)")
|
||||
logger.info(" Or manually update categories in WordPress")
|
||||
|
||||
logger.info("\n✓ Recategorization analysis complete!")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Re-categorize posts using Claude AI for better organization'
|
||||
)
|
||||
parser.add_argument(
|
||||
'csv_file',
|
||||
help='Path to exported posts CSV file'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
recategorizer = PostRecategorizer(args.csv_file)
|
||||
recategorizer.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,427 +0,0 @@
|
||||
"""
|
||||
Analytics data importer for SEO analysis.
|
||||
Merges Google Analytics and Search Console data with WordPress posts.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
from collections import defaultdict
|
||||
from config import Config
|
||||
|
||||
|
||||
class AnalyticsImporter:
|
||||
"""Import and consolidate analytics data with WordPress posts."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize importer."""
|
||||
self.config = Config
|
||||
self.output_dir = self.config.OUTPUT_DIR
|
||||
self.logs = []
|
||||
self.unmatched_urls = []
|
||||
|
||||
def log(self, message):
|
||||
"""Add message to log."""
|
||||
self.logs.append(message)
|
||||
print(message)
|
||||
|
||||
def normalize_url(self, url):
|
||||
"""Normalize URL for matching."""
|
||||
if not url:
|
||||
return ""
|
||||
# Remove trailing slash, protocol, www
|
||||
url = url.rstrip('/')
|
||||
if url.startswith('http'):
|
||||
url = urlparse(url).path
|
||||
url = url.replace('www.', '')
|
||||
return url.lower()
|
||||
|
||||
def extract_post_slug_from_url(self, url):
|
||||
"""Extract post slug from URL path."""
|
||||
path = urlparse(url).path.rstrip('/')
|
||||
parts = [p for p in path.split('/') if p]
|
||||
if parts:
|
||||
return parts[-1] # Last part is usually the slug
|
||||
return None
|
||||
|
||||
def load_ga4_data(self, ga4_csv):
|
||||
"""Load Google Analytics 4 data."""
|
||||
ga_data = {}
|
||||
if not ga4_csv.exists():
|
||||
self.log(f"⚠️ GA4 file not found: {ga4_csv}")
|
||||
return ga_data
|
||||
|
||||
try:
|
||||
with open(ga4_csv, 'r', encoding='utf-8') as f:
|
||||
# Skip comment lines at the top (lines starting with #)
|
||||
lines = [line for line in f if not line.startswith('#')]
|
||||
|
||||
reader = csv.DictReader(lines)
|
||||
for row in reader:
|
||||
if not row:
|
||||
continue
|
||||
# Handle French and English column names
|
||||
url = (row.get('Page path and screen class') or
|
||||
row.get('Chemin de la page et classe de l\'écran') or
|
||||
row.get('Page path') or
|
||||
row.get('Page') or '')
|
||||
if not url:
|
||||
continue
|
||||
|
||||
# Normalize URL
|
||||
normalized = self.normalize_url(url)
|
||||
|
||||
# Extract metrics (handle French and English column names)
|
||||
try:
|
||||
traffic = int(float(row.get('Screened Views', row.get('Views', row.get('Vues', '0'))) or 0))
|
||||
users = int(float(row.get('Users', row.get('Utilisateurs actifs', '0')) or 0))
|
||||
bounce_rate = float(row.get('Bounce rate', row.get('Taux de rebond', '0')) or 0)
|
||||
avg_duration_str = (row.get('Average session duration',
|
||||
row.get('Durée d\'engagement moyenne par utilisateur actif', '0')) or '0')
|
||||
avg_duration = float(avg_duration_str.replace(',', '.'))
|
||||
except (ValueError, TypeError):
|
||||
traffic = users = 0
|
||||
bounce_rate = avg_duration = 0
|
||||
|
||||
ga_data[normalized] = {
|
||||
'traffic': traffic,
|
||||
'users': users,
|
||||
'bounce_rate': bounce_rate,
|
||||
'avg_session_duration': avg_duration,
|
||||
'ga_url': url
|
||||
}
|
||||
self.log(f"✓ Loaded {len(ga_data)} GA4 entries")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error reading GA4 file: {e}")
|
||||
|
||||
return ga_data
|
||||
|
||||
def load_gsc_data(self, gsc_csv):
|
||||
"""Load Google Search Console data (Page-level or Query-level)."""
|
||||
gsc_data = {}
|
||||
if not gsc_csv.exists():
|
||||
self.log(f"⚠️ GSC file not found: {gsc_csv}")
|
||||
return gsc_data
|
||||
|
||||
try:
|
||||
with open(gsc_csv, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
if not row:
|
||||
continue
|
||||
|
||||
# Determine if this is page-level or query-level data
|
||||
# Pages.csv has: "Pages les plus populaires", Queries.csv has: "Requêtes les plus fréquentes"
|
||||
url = (row.get('Page') or
|
||||
row.get('Pages les plus populaires') or
|
||||
row.get('URL') or '')
|
||||
|
||||
query = row.get('Query') or row.get('Requêtes les plus fréquentes', '').strip()
|
||||
|
||||
# Skip rows without URLs (query-only data)
|
||||
if not url:
|
||||
continue
|
||||
|
||||
# Try to parse metrics with flexible column names
|
||||
try:
|
||||
# Handle different number formats (decimal separator, percentage signs)
|
||||
clicks_str = row.get('Clics', row.get('Clicks', '0')) or '0'
|
||||
impressions_str = row.get('Impressions', '0') or '0'
|
||||
ctr_str = row.get('CTR', '0') or '0'
|
||||
position_str = row.get('Position', '0') or '0'
|
||||
|
||||
clicks = int(float(clicks_str.replace(',', '.').rstrip('%')))
|
||||
impressions = int(float(impressions_str.replace(',', '.')))
|
||||
ctr = float(ctr_str.replace(',', '.').rstrip('%')) / 100
|
||||
position = float(position_str.replace(',', '.'))
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
clicks = impressions = 0
|
||||
ctr = position = 0
|
||||
|
||||
normalized = self.normalize_url(url)
|
||||
|
||||
if normalized not in gsc_data:
|
||||
gsc_data[normalized] = {
|
||||
'impressions': 0,
|
||||
'clicks': 0,
|
||||
'avg_position': 0,
|
||||
'ctr': 0,
|
||||
'keywords': [],
|
||||
'gsc_url': url
|
||||
}
|
||||
|
||||
# Accumulate data (in case of multiple rows per URL)
|
||||
gsc_data[normalized]['impressions'] += impressions
|
||||
gsc_data[normalized]['clicks'] += clicks
|
||||
|
||||
# Store position
|
||||
if position > 0:
|
||||
gsc_data[normalized]['positions'] = gsc_data[normalized].get('positions', [])
|
||||
gsc_data[normalized]['positions'].append(position)
|
||||
|
||||
if query and query not in gsc_data[normalized]['keywords']:
|
||||
gsc_data[normalized]['keywords'].append(query)
|
||||
|
||||
# Calculate average positions and finalize
|
||||
for data in gsc_data.values():
|
||||
if data.get('positions'):
|
||||
data['avg_position'] = sum(data['positions']) / len(data['positions'])
|
||||
del data['positions']
|
||||
# Recalculate CTR from totals
|
||||
if data['impressions'] > 0:
|
||||
data['ctr'] = data['clicks'] / data['impressions']
|
||||
data['keywords_count'] = len(data.get('keywords', []))
|
||||
|
||||
self.log(f"✓ Loaded {len(gsc_data)} GSC entries")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error reading GSC file: {e}")
|
||||
|
||||
return gsc_data
|
||||
|
||||
def load_posts_csv(self, posts_csv):
|
||||
"""Load existing WordPress posts CSV."""
|
||||
posts = {}
|
||||
if not posts_csv.exists():
|
||||
self.log(f"⚠️ Posts file not found: {posts_csv}")
|
||||
return posts
|
||||
|
||||
try:
|
||||
with open(posts_csv, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
# Handle different column name variations
|
||||
post_id = row.get('ID') or row.get('post_id')
|
||||
post_url = row.get('URL') or row.get('Post URL') or row.get('post_url')
|
||||
post_slug = row.get('Post Slug') or row.get('Slug') or row.get('post_slug')
|
||||
post_title = row.get('Title') or row.get('post_title')
|
||||
|
||||
if not post_id:
|
||||
continue
|
||||
|
||||
normalized = self.normalize_url(post_url) if post_url else ""
|
||||
|
||||
# Handle different SEO column names
|
||||
seo_title = (row.get('SEO Title') or
|
||||
row.get('proposed_seo_title') or
|
||||
row.get('current_seo_title') or '')
|
||||
meta_desc = (row.get('Meta Description') or
|
||||
row.get('proposed_meta_description') or
|
||||
row.get('current_meta_description') or '')
|
||||
|
||||
posts[post_id] = {
|
||||
'title': post_title or '',
|
||||
'url': post_url,
|
||||
'slug': post_slug,
|
||||
'normalized_url': normalized,
|
||||
'seo_title': seo_title,
|
||||
'meta_description': meta_desc,
|
||||
**{k: v for k, v in row.items()
|
||||
if k not in ['ID', 'post_id', 'Title', 'post_title', 'URL', 'Post URL', 'post_url',
|
||||
'Post Slug', 'Slug', 'post_slug', 'SEO Title', 'proposed_seo_title',
|
||||
'current_seo_title', 'Meta Description', 'proposed_meta_description',
|
||||
'current_meta_description']}
|
||||
}
|
||||
|
||||
self.log(f"✓ Loaded {len(posts)} posts from CSV")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error reading posts CSV: {e}")
|
||||
|
||||
return posts
|
||||
|
||||
def match_analytics_to_posts(self, posts, ga_data, gsc_data):
|
||||
"""Match analytics data to posts with fuzzy matching."""
|
||||
self.log("\n📊 Matching analytics data to posts...")
|
||||
matched_count = 0
|
||||
|
||||
for post_id, post_info in posts.items():
|
||||
slug = post_info.get('slug') or self.extract_post_slug_from_url(post_info.get('url', ''))
|
||||
normalized_url = post_info.get('normalized_url', '')
|
||||
|
||||
# Try direct URL match first
|
||||
if normalized_url in ga_data:
|
||||
post_info['ga_data'] = ga_data[normalized_url]
|
||||
matched_count += 1
|
||||
else:
|
||||
post_info['ga_data'] = {}
|
||||
|
||||
if normalized_url in gsc_data:
|
||||
post_info['gsc_data'] = gsc_data[normalized_url]
|
||||
matched_count += 1
|
||||
else:
|
||||
post_info['gsc_data'] = {}
|
||||
|
||||
# Try slug-based matching if URL didn't match
|
||||
if not post_info.get('gsc_data') and slug:
|
||||
for gsc_url, gsc_info in gsc_data.items():
|
||||
if slug in gsc_url:
|
||||
post_info['gsc_data'] = gsc_info
|
||||
break
|
||||
|
||||
# Track unmatched GSC URLs
|
||||
matched_gsc_urls = set()
|
||||
for post in posts.values():
|
||||
if post.get('gsc_data'):
|
||||
matched_gsc_urls.add(id(post['gsc_data']))
|
||||
|
||||
for normalized_url, gsc_info in gsc_data.items():
|
||||
if id(gsc_info) not in matched_gsc_urls and gsc_info.get('impressions', 0) > 0:
|
||||
self.unmatched_urls.append({
|
||||
'url': gsc_info.get('gsc_url', normalized_url),
|
||||
'impressions': gsc_info.get('impressions', 0),
|
||||
'clicks': gsc_info.get('clicks', 0),
|
||||
'avg_position': gsc_info.get('avg_position', 0)
|
||||
})
|
||||
|
||||
self.log(f"✓ Matched data to posts")
|
||||
return posts
|
||||
|
||||
def enrich_posts_data(self, posts):
|
||||
"""Enrich posts with calculated metrics."""
|
||||
for post_info in posts.values():
|
||||
ga = post_info.get('ga_data', {})
|
||||
gsc = post_info.get('gsc_data', {})
|
||||
|
||||
# GA metrics
|
||||
post_info['traffic'] = ga.get('traffic', 0)
|
||||
post_info['users'] = ga.get('users', 0)
|
||||
post_info['bounce_rate'] = ga.get('bounce_rate', 0)
|
||||
post_info['avg_session_duration'] = ga.get('avg_session_duration', 0)
|
||||
|
||||
# GSC metrics
|
||||
post_info['impressions'] = gsc.get('impressions', 0)
|
||||
post_info['clicks'] = gsc.get('clicks', 0)
|
||||
post_info['avg_position'] = gsc.get('avg_position', 0)
|
||||
post_info['ctr'] = gsc.get('ctr', 0)
|
||||
post_info['keywords_count'] = gsc.get('keywords_count', 0)
|
||||
post_info['top_keywords'] = ','.join(gsc.get('keywords', [])[:5])
|
||||
|
||||
return posts
|
||||
|
||||
def export_enriched_csv(self, posts, output_csv):
|
||||
"""Export enriched posts data to CSV."""
|
||||
if not posts:
|
||||
self.log("❌ No posts to export")
|
||||
return
|
||||
|
||||
try:
|
||||
fieldnames = [
|
||||
'ID', 'Title', 'URL', 'SEO Title', 'Meta Description',
|
||||
'traffic', 'users', 'bounce_rate', 'avg_session_duration',
|
||||
'impressions', 'clicks', 'avg_position', 'ctr', 'keywords_count', 'top_keywords'
|
||||
]
|
||||
|
||||
# Add any extra fields from original posts
|
||||
all_keys = set()
|
||||
for post in posts.values():
|
||||
all_keys.update(post.keys())
|
||||
|
||||
extra_fields = [k for k in sorted(all_keys)
|
||||
if k not in fieldnames and k not in ['ga_data', 'gsc_data', 'normalized_url', 'slug']]
|
||||
fieldnames.extend(extra_fields)
|
||||
|
||||
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
||||
writer.writeheader()
|
||||
|
||||
for post_id, post_info in sorted(posts.items()):
|
||||
row = {'ID': post_id}
|
||||
row.update(post_info)
|
||||
# Clean up nested dicts
|
||||
for key in ['ga_data', 'gsc_data']:
|
||||
row.pop(key, None)
|
||||
writer.writerow(row)
|
||||
|
||||
self.log(f"✓ Exported {len(posts)} posts to {output_csv}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting CSV: {e}")
|
||||
|
||||
def export_log(self, log_file):
|
||||
"""Export analysis log and unmatched URLs."""
|
||||
try:
|
||||
with open(log_file, 'w', encoding='utf-8') as f:
|
||||
f.write("SEO Analytics Import Report\n")
|
||||
f.write("=" * 60 + "\n\n")
|
||||
|
||||
f.write("Import Log:\n")
|
||||
f.write("-" * 60 + "\n")
|
||||
for log_msg in self.logs:
|
||||
f.write(log_msg + "\n")
|
||||
|
||||
f.write("\n" + "=" * 60 + "\n")
|
||||
f.write(f"Unmatched URLs ({len(self.unmatched_urls)} total):\n")
|
||||
f.write("-" * 60 + "\n")
|
||||
|
||||
if self.unmatched_urls:
|
||||
# Sort by impressions descending
|
||||
for url_data in sorted(self.unmatched_urls,
|
||||
key=lambda x: x['impressions'],
|
||||
reverse=True):
|
||||
f.write(f"\nURL: {url_data['url']}\n")
|
||||
f.write(f" Impressions: {url_data['impressions']}\n")
|
||||
f.write(f" Clicks: {url_data['clicks']}\n")
|
||||
f.write(f" Avg Position: {url_data['avg_position']:.1f}\n")
|
||||
else:
|
||||
f.write("✓ All URLs matched successfully!\n")
|
||||
|
||||
self.log(f"✓ Exported log to {log_file}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting log: {e}")
|
||||
|
||||
def run(self, ga_csv, gsc_csv, posts_csv, output_csv):
|
||||
"""Run complete import workflow."""
|
||||
self.log("Starting analytics import...")
|
||||
self.log(f"GA4 CSV: {ga_csv}")
|
||||
self.log(f"GSC CSV: {gsc_csv}")
|
||||
self.log(f"Posts CSV: {posts_csv}\n")
|
||||
|
||||
# Load data
|
||||
ga_data = self.load_ga4_data(ga_csv)
|
||||
gsc_data = self.load_gsc_data(gsc_csv)
|
||||
posts = self.load_posts_csv(posts_csv)
|
||||
|
||||
if not posts:
|
||||
self.log("❌ No posts found. Cannot proceed.")
|
||||
return
|
||||
|
||||
# Match and merge
|
||||
posts = self.match_analytics_to_posts(posts, ga_data, gsc_data)
|
||||
posts = self.enrich_posts_data(posts)
|
||||
|
||||
# Export
|
||||
self.export_enriched_csv(posts, output_csv)
|
||||
|
||||
# Export log
|
||||
log_dir = self.output_dir / 'logs'
|
||||
log_dir.mkdir(exist_ok=True)
|
||||
log_file = log_dir / 'import_log.txt'
|
||||
self.export_log(log_file)
|
||||
|
||||
self.log("\n✓ Analytics import complete!")
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(description='Import and merge analytics data')
|
||||
parser.add_argument('--ga-export', type=Path,
|
||||
default=Path('input/analytics/ga4_export.csv'),
|
||||
help='GA4 export CSV path')
|
||||
parser.add_argument('--gsc-export', type=Path,
|
||||
default=Path('input/analytics/gsc/Pages.csv'),
|
||||
help='Search Console export CSV path (Pages data)')
|
||||
parser.add_argument('--posts-csv', type=Path,
|
||||
default=Path('input/new-propositions.csv'),
|
||||
help='Posts CSV path')
|
||||
parser.add_argument('--output', type=Path,
|
||||
default=Path('output/results/posts_with_analytics.csv'),
|
||||
help='Output CSV path')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
importer = AnalyticsImporter()
|
||||
importer.run(args.ga_export, args.gsc_export, args.posts_csv, args.output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,614 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
WordPress Category Management Script
|
||||
Fetches all categories from WordPress sites, proposes new categories,
|
||||
and allows assigning posts to categories or websites using AI recommendations.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
import time
|
||||
from datetime import datetime
|
||||
from config import Config
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AICategoryAdvisor:
|
||||
"""AI-powered advisor for category and site recommendations."""
|
||||
|
||||
def __init__(self):
|
||||
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
||||
self.ai_model = Config.AI_MODEL
|
||||
self.api_calls = 0
|
||||
self.ai_cost = 0.0
|
||||
|
||||
def get_ai_category_recommendations(self, posts_batch: List[Dict]) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Get AI recommendations for category assignments.
|
||||
|
||||
Args:
|
||||
posts_batch: List of posts to analyze
|
||||
|
||||
Returns:
|
||||
List of recommendations for each post
|
||||
"""
|
||||
if not self.openrouter_api_key:
|
||||
logger.error("OPENROUTER_API_KEY not set")
|
||||
return None
|
||||
|
||||
# Format posts for AI analysis
|
||||
formatted_posts = []
|
||||
for i, post in enumerate(posts_batch, 1):
|
||||
title = post.get('title', {}).get('rendered', 'Untitled')
|
||||
content = post.get('content', {}).get('rendered', '')[:500] # First 500 chars
|
||||
current_categories = post.get('categories', [])
|
||||
|
||||
formatted_posts.append(
|
||||
f"{i}. POST ID: {post['id']}\n"
|
||||
f" Title: {title}\n"
|
||||
f" Content Preview: {content}...\n"
|
||||
f" Current Categories: {current_categories}\n"
|
||||
)
|
||||
|
||||
posts_text = "\n".join(formatted_posts)
|
||||
|
||||
prompt = f"""Analyze these blog posts and provide category recommendations.
|
||||
|
||||
Website Strategy:
|
||||
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
|
||||
- webscroll.fr: Torrenting, File-Sharing, Tracker guides (niche audience)
|
||||
- hellogeek.net: Low-traffic, experimental, off-brand, or niche content
|
||||
|
||||
{posts_text}
|
||||
|
||||
For EACH post, provide a JSON object with:
|
||||
{{
|
||||
"post_id": <id>,
|
||||
"recommended_category": "<SUGGESTED_CATEGORY>",
|
||||
"recommended_site": "<SITE_NAME>",
|
||||
"reason": "<Brief reason for recommendation>",
|
||||
"confidence": "<High|Medium|Low>"
|
||||
}}
|
||||
|
||||
Return ONLY a JSON array. Example:
|
||||
[
|
||||
{{"post_id": 2845, "recommended_category": "VPN", "recommended_site": "mistergeek.net", "reason": "Core VPN topic", "confidence": "High"}},
|
||||
{{"post_id": 1234, "recommended_category": "Torrenting", "recommended_site": "webscroll.fr", "reason": "Torrent tracker content", "confidence": "High"}}
|
||||
]
|
||||
|
||||
Analyze all posts and provide recommendations for EVERY post in the batch."""
|
||||
|
||||
try:
|
||||
logger.info(f" Sending batch to AI for category recommendations...")
|
||||
|
||||
response = requests.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": self.ai_model,
|
||||
"messages": [
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": 0.3, # Lower temp for more consistent recommendations
|
||||
},
|
||||
timeout=60
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
self.api_calls += 1
|
||||
|
||||
# Track cost
|
||||
usage = result.get('usage', {})
|
||||
input_tokens = usage.get('prompt_tokens', 0)
|
||||
output_tokens = usage.get('completion_tokens', 0)
|
||||
# Using Claude 3.5 Sonnet pricing: $3/$15 per 1M tokens
|
||||
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
||||
|
||||
recommendations_text = result['choices'][0]['message']['content'].strip()
|
||||
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
|
||||
|
||||
# Parse the recommendations
|
||||
return self._parse_recommendations(recommendations_text)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting AI recommendations: {e}")
|
||||
return None
|
||||
|
||||
def _parse_recommendations(self, recommendations_json: str) -> List[Dict]:
|
||||
"""Parse JSON recommendations from AI."""
|
||||
try:
|
||||
# Try to extract JSON from response
|
||||
start_idx = recommendations_json.find('[')
|
||||
end_idx = recommendations_json.rfind(']') + 1
|
||||
|
||||
if start_idx == -1 or end_idx == 0:
|
||||
logger.error("Could not find JSON array in response")
|
||||
return []
|
||||
|
||||
json_str = recommendations_json[start_idx:end_idx]
|
||||
recommendations = json.loads(json_str)
|
||||
|
||||
return recommendations
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing JSON recommendations: {e}")
|
||||
logger.debug(f"Response was: {recommendations_json[:500]}")
|
||||
return []
|
||||
|
||||
|
||||
class CategoryManager:
|
||||
"""Manage WordPress categories across multiple sites."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the category manager with sites from Config."""
|
||||
self.sites = Config.WORDPRESS_SITES
|
||||
self.categories_by_site = {}
|
||||
self.posts_by_site = {}
|
||||
self.proposed_categories = {}
|
||||
self.category_assignments = []
|
||||
self.ai_advisor = AICategoryAdvisor()
|
||||
|
||||
def fetch_categories_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
|
||||
"""
|
||||
Fetch all categories from a WordPress site.
|
||||
|
||||
Args:
|
||||
site_name: Website name
|
||||
site_config: Site configuration dict
|
||||
|
||||
Returns:
|
||||
List of categories with metadata
|
||||
"""
|
||||
logger.info(f"Fetching categories from {site_name}...")
|
||||
|
||||
categories = []
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/categories"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
try:
|
||||
# Fetch all categories (pagination if needed)
|
||||
page = 1
|
||||
while True:
|
||||
params = {
|
||||
'page': page,
|
||||
'per_page': 100,
|
||||
}
|
||||
|
||||
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
||||
|
||||
if response.status_code == 401:
|
||||
logger.error(f"Unauthorized access to {site_name}. Check credentials.")
|
||||
break
|
||||
elif response.status_code == 403:
|
||||
logger.error(f"Forbidden access to {site_name}. Check permissions.")
|
||||
break
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
page_categories = response.json()
|
||||
if not page_categories:
|
||||
break
|
||||
|
||||
categories.extend(page_categories)
|
||||
logger.info(f" Page {page}: Got {len(page_categories)} categories")
|
||||
|
||||
# Check if there are more pages
|
||||
link_header = response.headers.get('Link', '')
|
||||
if 'rel="next"' not in link_header:
|
||||
break
|
||||
|
||||
page += 1
|
||||
time.sleep(0.5)
|
||||
|
||||
logger.info(f"✓ Total categories from {site_name}: {len(categories)}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Error fetching categories from {site_name}: {e}")
|
||||
return []
|
||||
|
||||
return categories
|
||||
|
||||
def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
|
||||
"""
|
||||
Fetch posts from a WordPress site to see current category assignments.
|
||||
|
||||
Args:
|
||||
site_name: Website name
|
||||
site_config: Site configuration dict
|
||||
|
||||
Returns:
|
||||
List of posts with category information
|
||||
"""
|
||||
logger.info(f"Fetching posts from {site_name} to analyze category assignments...")
|
||||
|
||||
posts = []
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
try:
|
||||
page = 1
|
||||
while True:
|
||||
params = {
|
||||
'page': page,
|
||||
'per_page': 100,
|
||||
'status': 'publish',
|
||||
}
|
||||
|
||||
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
||||
|
||||
if response.status_code == 401:
|
||||
logger.error(f"Unauthorized access to {site_name}. Check credentials.")
|
||||
break
|
||||
elif response.status_code == 403:
|
||||
logger.error(f"Forbidden access to {site_name}. Check permissions.")
|
||||
break
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
page_posts = response.json()
|
||||
if not page_posts:
|
||||
break
|
||||
|
||||
posts.extend(page_posts)
|
||||
logger.info(f" Page {page}: Got {len(page_posts)} posts")
|
||||
|
||||
# Check if there are more pages
|
||||
link_header = response.headers.get('Link', '')
|
||||
if 'rel="next"' not in link_header:
|
||||
break
|
||||
|
||||
page += 1
|
||||
time.sleep(0.5)
|
||||
|
||||
logger.info(f"✓ Total posts from {site_name}: {len(posts)}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Error fetching posts from {site_name}: {e}")
|
||||
return []
|
||||
|
||||
return posts
|
||||
|
||||
def analyze_categories(self):
|
||||
"""Analyze current categories and propose new ones."""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("ANALYZING CURRENT CATEGORIES")
|
||||
logger.info("="*70)
|
||||
|
||||
for site_name, config in self.sites.items():
|
||||
categories = self.fetch_categories_from_site(site_name, config)
|
||||
posts = self.fetch_posts_from_site(site_name, config)
|
||||
|
||||
self.categories_by_site[site_name] = categories
|
||||
self.posts_by_site[site_name] = posts
|
||||
|
||||
logger.info(f"\n{site_name}:")
|
||||
logger.info(f" Categories: {len(categories)}")
|
||||
logger.info(f" Posts: {len(posts)}")
|
||||
|
||||
# Show top categories by post count
|
||||
if categories:
|
||||
logger.info(" Top 10 categories by post count:")
|
||||
# Sort categories by count (most posts first)
|
||||
sorted_cats = sorted(categories, key=lambda x: x.get('count', 0), reverse=True)
|
||||
for i, cat in enumerate(sorted_cats[:10]):
|
||||
logger.info(f" {i+1}. {cat['name']} ({cat['count']} posts)")
|
||||
|
||||
def propose_new_categories(self):
|
||||
"""Propose new categories based on content analysis."""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("PROPOSING NEW CATEGORIES")
|
||||
logger.info("="*70)
|
||||
|
||||
# Define category proposals based on content analysis
|
||||
category_proposals = {
|
||||
'mistergeek.net': [
|
||||
{'name': 'VPN Reviews', 'description': 'Reviews of VPN services', 'parent': 0},
|
||||
{'name': 'Software Tutorials', 'description': 'Step-by-step software guides', 'parent': 0},
|
||||
{'name': 'Tech News', 'description': 'Latest technology news', 'parent': 0},
|
||||
{'name': 'Cybersecurity', 'description': 'Security tips and tools', 'parent': 0},
|
||||
],
|
||||
'webscroll.fr': [
|
||||
{'name': 'Torrent Clients', 'description': 'Reviews of torrent clients', 'parent': 0},
|
||||
{'name': 'Privacy Tools', 'description': 'Privacy-focused tools and services', 'parent': 0},
|
||||
{'name': 'File Sharing Guide', 'description': 'Guides on file sharing methods', 'parent': 0},
|
||||
],
|
||||
'hellogeek.net': [
|
||||
{'name': 'Experimental Tech', 'description': 'New and experimental tech', 'parent': 0},
|
||||
{'name': 'Random Thoughts', 'description': 'Opinion and commentary posts', 'parent': 0},
|
||||
{'name': 'Testing Zone', 'description': 'Posts for testing purposes', 'parent': 0},
|
||||
]
|
||||
}
|
||||
|
||||
for site_name in self.sites.keys():
|
||||
if site_name in category_proposals:
|
||||
self.proposed_categories[site_name] = category_proposals[site_name]
|
||||
logger.info(f"\n{site_name} - Proposed categories:")
|
||||
for cat in category_proposals[site_name]:
|
||||
logger.info(f" - {cat['name']}: {cat['description']}")
|
||||
|
||||
def create_category_assignment_proposals(self):
|
||||
"""Create proposals for assigning posts to categories or websites."""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("CREATING CATEGORY ASSIGNMENT PROPOSALS")
|
||||
logger.info("="*70)
|
||||
|
||||
# Analyze posts and propose category assignments
|
||||
for site_name, posts in self.posts_by_site.items():
|
||||
logger.info(f"\nAnalyzing posts from {site_name} for category assignments...")
|
||||
|
||||
# Process posts in batches for AI analysis
|
||||
batch_size = 10
|
||||
for i in range(0, len(posts), batch_size):
|
||||
batch = posts[i:i + batch_size]
|
||||
|
||||
# Get AI recommendations for this batch
|
||||
ai_recommendations = self.ai_advisor.get_ai_category_recommendations(batch)
|
||||
|
||||
if ai_recommendations:
|
||||
# Map AI recommendations to our assignment format
|
||||
for post in batch:
|
||||
title = post.get('title', {}).get('rendered', 'Untitled')
|
||||
content = post.get('content', {}).get('rendered', '')[:200] # First 200 chars
|
||||
current_categories = post.get('categories', [])
|
||||
|
||||
# Find the AI recommendation for this post
|
||||
ai_rec = None
|
||||
for rec in ai_recommendations:
|
||||
if rec.get('post_id') == post['id']:
|
||||
ai_rec = rec
|
||||
break
|
||||
|
||||
if ai_rec:
|
||||
assignment = {
|
||||
'site': site_name,
|
||||
'post_id': post['id'],
|
||||
'post_title': title[:50] + "..." if len(title) > 50 else title,
|
||||
'current_categories': current_categories,
|
||||
'proposed_category': ai_rec.get('recommended_category', 'Uncategorized'),
|
||||
'proposed_site': ai_rec.get('recommended_site', site_name),
|
||||
'reason': ai_rec.get('reason', ''),
|
||||
'confidence': ai_rec.get('confidence', 'Low'),
|
||||
'content_preview': content[:100] + "..." if len(content) > 100 else content,
|
||||
'status': 'pending_approval'
|
||||
}
|
||||
else:
|
||||
# Fallback to keyword-based suggestion if no AI recommendation
|
||||
proposed_category = self._suggest_category_by_content(title + " " + content, site_name)
|
||||
|
||||
assignment = {
|
||||
'site': site_name,
|
||||
'post_id': post['id'],
|
||||
'post_title': title[:50] + "..." if len(title) > 50 else title,
|
||||
'current_categories': current_categories,
|
||||
'proposed_category': proposed_category,
|
||||
'proposed_site': site_name,
|
||||
'reason': 'Keyword-based suggestion',
|
||||
'confidence': 'Low',
|
||||
'content_preview': content[:100] + "..." if len(content) > 100 else content,
|
||||
'status': 'pending_approval'
|
||||
}
|
||||
|
||||
self.category_assignments.append(assignment)
|
||||
else:
|
||||
# If AI is not available, use keyword-based suggestions
|
||||
for post in batch:
|
||||
title = post.get('title', {}).get('rendered', 'Untitled')
|
||||
content = post.get('content', {}).get('rendered', '')[:200] # First 200 chars
|
||||
current_categories = post.get('categories', [])
|
||||
|
||||
proposed_category = self._suggest_category_by_content(title + " " + content, site_name)
|
||||
|
||||
assignment = {
|
||||
'site': site_name,
|
||||
'post_id': post['id'],
|
||||
'post_title': title[:50] + "..." if len(title) > 50 else title,
|
||||
'current_categories': current_categories,
|
||||
'proposed_category': proposed_category,
|
||||
'proposed_site': site_name,
|
||||
'reason': 'Keyword-based suggestion',
|
||||
'confidence': 'Low',
|
||||
'content_preview': content[:100] + "..." if len(content) > 100 else content,
|
||||
'status': 'pending_approval'
|
||||
}
|
||||
|
||||
self.category_assignments.append(assignment)
|
||||
|
||||
logger.info(f"Created {len(self.category_assignments)} category assignment proposals")
|
||||
|
||||
def _suggest_category_by_content(self, content: str, site_name: str) -> str:
|
||||
"""Suggest a category based on content keywords."""
|
||||
content_lower = content.lower()
|
||||
|
||||
# Site-specific category mappings
|
||||
category_keywords = {
|
||||
'mistergeek.net': {
|
||||
'VPN': ['vpn', 'proxy', 'privacy', 'secure', 'encryption'],
|
||||
'Software': ['software', 'app', 'tool', 'download', 'install'],
|
||||
'Gaming': ['game', 'gaming', 'console', 'steam', 'playstation'],
|
||||
'Tech News': ['news', 'update', 'release', 'announced'],
|
||||
'Cybersecurity': ['security', 'malware', 'antivirus', 'hacking', 'breach']
|
||||
},
|
||||
'webscroll.fr': {
|
||||
'Torrent': ['torrent', 'download', 'upload', 'client', 'tracker'],
|
||||
'Privacy': ['privacy', 'anonymous', 'tor', 'vpn'],
|
||||
'File Sharing': ['share', 'sharing', 'ddl', 'upload']
|
||||
},
|
||||
'hellogeek.net': {
|
||||
'Opinion': ['think', 'believe', 'opinion', 'view', 'perspective'],
|
||||
'Tutorial': ['how to', 'guide', 'tutorial', 'steps', 'instructions'],
|
||||
'Review': ['review', 'rating', 'comparison', 'test']
|
||||
}
|
||||
}
|
||||
|
||||
site_categories = category_keywords.get(site_name, {})
|
||||
|
||||
for category, keywords in site_categories.items():
|
||||
for keyword in keywords:
|
||||
if keyword in content_lower:
|
||||
return category
|
||||
|
||||
return 'Uncategorized'
|
||||
|
||||
def export_categories_csv(self) -> str:
|
||||
"""Export current categories to CSV."""
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
csv_file = output_dir / f'current_categories_{timestamp}.csv'
|
||||
|
||||
fieldnames = ['site', 'category_id', 'name', 'slug', 'description', 'post_count', 'parent_id']
|
||||
|
||||
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for site_name, categories in self.categories_by_site.items():
|
||||
for cat in categories:
|
||||
writer.writerow({
|
||||
'site': site_name,
|
||||
'category_id': cat.get('id', ''),
|
||||
'name': cat.get('name', ''),
|
||||
'slug': cat.get('slug', ''),
|
||||
'description': cat.get('description', ''),
|
||||
'post_count': cat.get('count', 0),
|
||||
'parent_id': cat.get('parent', 0)
|
||||
})
|
||||
|
||||
logger.info(f"✓ Current categories exported to: {csv_file}")
|
||||
return str(csv_file)
|
||||
|
||||
def export_proposed_categories_csv(self) -> str:
|
||||
"""Export proposed new categories to CSV."""
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
csv_file = output_dir / f'proposed_categories_{timestamp}.csv'
|
||||
|
||||
fieldnames = ['site', 'proposed_category', 'description', 'parent_category', 'reason']
|
||||
|
||||
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for site_name, categories in self.proposed_categories.items():
|
||||
for cat in categories:
|
||||
writer.writerow({
|
||||
'site': site_name,
|
||||
'proposed_category': cat.get('name', ''),
|
||||
'description': cat.get('description', ''),
|
||||
'parent_category': cat.get('parent', 0),
|
||||
'reason': 'Content analysis and organization improvement'
|
||||
})
|
||||
|
||||
logger.info(f"✓ Proposed categories exported to: {csv_file}")
|
||||
return str(csv_file)
|
||||
|
||||
def export_category_assignments_csv(self) -> str:
|
||||
"""Export category assignment proposals to CSV."""
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
csv_file = output_dir / f'category_assignments_{timestamp}.csv'
|
||||
|
||||
fieldnames = ['site', 'post_id', 'post_title', 'current_categories', 'proposed_category', 'proposed_site', 'reason', 'confidence', 'content_preview', 'status']
|
||||
|
||||
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for assignment in self.category_assignments:
|
||||
writer.writerow(assignment)
|
||||
|
||||
logger.info(f"✓ Category assignments exported to: {csv_file}")
|
||||
return str(csv_file)
|
||||
|
||||
def run(self):
|
||||
"""Run complete category management process."""
|
||||
logger.info("="*70)
|
||||
logger.info("WORDPRESS CATEGORY MANAGEMENT")
|
||||
logger.info("="*70)
|
||||
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
|
||||
logger.info("")
|
||||
|
||||
# Analyze current categories
|
||||
self.analyze_categories()
|
||||
|
||||
# Propose new categories
|
||||
self.propose_new_categories()
|
||||
|
||||
# Create category assignment proposals
|
||||
self.create_category_assignment_proposals()
|
||||
|
||||
# Export all data
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("EXPORTING RESULTS")
|
||||
logger.info("="*70)
|
||||
|
||||
categories_csv = self.export_categories_csv()
|
||||
proposed_csv = self.export_proposed_categories_csv()
|
||||
assignments_csv = self.export_category_assignments_csv()
|
||||
|
||||
# Print summary
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("CATEGORY MANAGEMENT SUMMARY")
|
||||
logger.info("="*70)
|
||||
|
||||
total_categories = sum(len(cats) for cats in self.categories_by_site.values())
|
||||
logger.info(f"Total current categories: {total_categories}")
|
||||
|
||||
total_proposed = sum(len(props) for props in self.proposed_categories.values())
|
||||
logger.info(f"Total proposed categories: {total_proposed}")
|
||||
|
||||
logger.info(f"Category assignment proposals: {len(self.category_assignments)}")
|
||||
|
||||
# AI Advisor stats
|
||||
logger.info(f"AI API calls made: {self.ai_advisor.api_calls}")
|
||||
logger.info(f"AI cost: ${self.ai_advisor.ai_cost:.4f}")
|
||||
|
||||
logger.info(f"\n{'─'*70}")
|
||||
logger.info("Exported files:")
|
||||
logger.info(f" • Current categories: {categories_csv}")
|
||||
logger.info(f" • Proposed categories: {proposed_csv}")
|
||||
logger.info(f" • Category assignments: {assignments_csv}")
|
||||
logger.info(f"{'─'*70}")
|
||||
|
||||
logger.info(f"\n✓ Category management complete!")
|
||||
logger.info(f"\nNext steps:")
|
||||
logger.info(f" 1. Review proposed_categories.csv for new categories to add")
|
||||
logger.info(f" 2. Review category_assignments.csv for posts that need re-categorization")
|
||||
logger.info(f" 3. Manually approve or modify proposals before applying changes")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Manage WordPress categories across multiple sites'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
manager = CategoryManager()
|
||||
manager.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,110 +0,0 @@
|
||||
"""
|
||||
Configuration module for WordPress SEO automation.
|
||||
Loads and validates environment variables and YAML configuration.
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
class Config:
|
||||
"""Configuration class for WordPress SEO automation."""
|
||||
|
||||
# Load configuration from YAML file
|
||||
CONFIG_FILE = Path(__file__).parent.parent / 'config.yaml'
|
||||
|
||||
if CONFIG_FILE.exists():
|
||||
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
|
||||
YAML_CONFIG = yaml.safe_load(f)
|
||||
else:
|
||||
YAML_CONFIG = {}
|
||||
|
||||
# WordPress Settings (Primary site)
|
||||
WORDPRESS_URL = os.getenv('WORDPRESS_URL', YAML_CONFIG.get('primary_site', {}).get('url', '')).rstrip('/')
|
||||
WORDPRESS_USERNAME = os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('primary_site', {}).get('username', ''))
|
||||
WORDPRESS_APP_PASSWORD = os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('primary_site', {}).get('password', ''))
|
||||
|
||||
# Multi-site WordPress Configuration
|
||||
WORDPRESS_SITES = {
|
||||
'mistergeek.net': {
|
||||
'url': os.getenv('WORDPRESS_MISTERGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('url', 'https://www.mistergeek.net')),
|
||||
'username': os.getenv('WORDPRESS_MISTERGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('username', ''))),
|
||||
'password': os.getenv('WORDPRESS_MISTERGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('password', ''))),
|
||||
},
|
||||
'webscroll.fr': {
|
||||
'url': os.getenv('WORDPRESS_WEBSCROLL_URL', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('url', 'https://www.webscroll.fr')),
|
||||
'username': os.getenv('WORDPRESS_WEBSCROLL_USERNAME', os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('username', ''))),
|
||||
'password': os.getenv('WORDPRESS_WEBSCROLL_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('password', ''))),
|
||||
},
|
||||
'hellogeek.net': {
|
||||
'url': os.getenv('WORDPRESS_HELLOGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('url', 'https://www.hellogeek.net')),
|
||||
'username': os.getenv('WORDPRESS_HELLOGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('username', ''))),
|
||||
'password': os.getenv('WORDPRESS_HELLOGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('password', ''))),
|
||||
}
|
||||
}
|
||||
|
||||
# OpenRouter API Settings
|
||||
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', YAML_CONFIG.get('ai_model', {}).get('api_key', ''))
|
||||
AI_MODEL = os.getenv('AI_MODEL', YAML_CONFIG.get('ai_model', {}).get('name', 'anthropic/claude-3.5-sonnet'))
|
||||
|
||||
# Script Settings
|
||||
BATCH_SIZE = int(os.getenv('BATCH_SIZE', str(YAML_CONFIG.get('script_settings', {}).get('batch_size', 100))))
|
||||
API_DELAY_SECONDS = float(os.getenv('API_DELAY_SECONDS', str(YAML_CONFIG.get('script_settings', {}).get('api_delay_seconds', 0.5))))
|
||||
|
||||
# Analysis Settings
|
||||
ANALYSIS_MIN_POSITION = int(os.getenv('ANALYSIS_MIN_POSITION', str(YAML_CONFIG.get('analysis_settings', {}).get('min_position', 11))))
|
||||
ANALYSIS_MAX_POSITION = int(os.getenv('ANALYSIS_MAX_POSITION', str(YAML_CONFIG.get('analysis_settings', {}).get('max_position', 30))))
|
||||
ANALYSIS_MIN_IMPRESSIONS = int(os.getenv('ANALYSIS_MIN_IMPRESSIONS', str(YAML_CONFIG.get('analysis_settings', {}).get('min_impressions', 50))))
|
||||
ANALYSIS_TOP_N_POSTS = int(os.getenv('ANALYSIS_TOP_N_POSTS', str(YAML_CONFIG.get('analysis_settings', {}).get('top_n_posts', 20))))
|
||||
|
||||
# Output directory
|
||||
OUTPUT_DIR = Path(os.getenv('OUTPUT_DIR', YAML_CONFIG.get('output_settings', {}).get('output_dir', './output')))
|
||||
|
||||
@classmethod
|
||||
def validate(cls):
|
||||
"""Validate that all required configuration is present."""
|
||||
errors = []
|
||||
|
||||
if not cls.WORDPRESS_URL:
|
||||
errors.append("WORDPRESS_URL is required")
|
||||
|
||||
if not cls.WORDPRESS_USERNAME:
|
||||
errors.append("WORDPRESS_USERNAME is required")
|
||||
|
||||
if not cls.WORDPRESS_APP_PASSWORD:
|
||||
errors.append("WORDPRESS_APP_PASSWORD is required")
|
||||
|
||||
if not cls.OPENROUTER_API_KEY:
|
||||
errors.append("OPENROUTER_API_KEY is required (get one from https://openrouter.ai/)")
|
||||
|
||||
if errors:
|
||||
raise ValueError("Configuration errors:\n" + "\n".join(f" - {e}" for e in errors))
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
cls.OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def get_wordpress_auth(cls):
|
||||
"""Get WordPress authentication tuple."""
|
||||
return (cls.WORDPRESS_USERNAME, cls.WORDPRESS_APP_PASSWORD)
|
||||
|
||||
@classmethod
|
||||
def get_api_base_url(cls):
|
||||
"""Get WordPress REST API base URL."""
|
||||
return f"{cls.WORDPRESS_URL}/wp-json/wp/v2"
|
||||
|
||||
@classmethod
|
||||
def get_site_config(cls, site_name):
|
||||
"""Get configuration for a specific site."""
|
||||
return cls.WORDPRESS_SITES.get(site_name, {})
|
||||
|
||||
@classmethod
|
||||
def get_all_sites(cls):
|
||||
"""Get all configured WordPress sites."""
|
||||
return cls.WORDPRESS_SITES.keys()
|
||||
@@ -1,348 +0,0 @@
|
||||
"""
|
||||
Content gap analyzer for SEO strategy.
|
||||
Identifies missing topics and content opportunities using AI analysis.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import argparse
|
||||
import time
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from openai import OpenAI
|
||||
from config import Config
|
||||
|
||||
|
||||
class ContentGapAnalyzer:
|
||||
"""Identify content gaps and opportunities."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize analyzer."""
|
||||
self.config = Config
|
||||
self.output_dir = self.config.OUTPUT_DIR
|
||||
self.logs = []
|
||||
self.client = None
|
||||
|
||||
if self.config.OPENROUTER_API_KEY:
|
||||
self.client = OpenAI(
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_key=self.config.OPENROUTER_API_KEY,
|
||||
)
|
||||
|
||||
def log(self, message):
|
||||
"""Add message to log."""
|
||||
self.logs.append(message)
|
||||
print(message)
|
||||
|
||||
def load_posts(self, posts_csv):
|
||||
"""Load post titles and data."""
|
||||
posts = []
|
||||
if not posts_csv.exists():
|
||||
self.log(f"❌ File not found: {posts_csv}")
|
||||
return posts
|
||||
|
||||
try:
|
||||
with open(posts_csv, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
posts.append({
|
||||
'id': row.get('ID', ''),
|
||||
'title': row.get('Title', ''),
|
||||
'url': row.get('URL', ''),
|
||||
'traffic': int(row.get('traffic', 0) or 0),
|
||||
'impressions': int(row.get('impressions', 0) or 0),
|
||||
'top_keywords': row.get('top_keywords', '')
|
||||
})
|
||||
|
||||
self.log(f"✓ Loaded {len(posts)} posts")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error reading posts: {e}")
|
||||
|
||||
return posts
|
||||
|
||||
def load_gsc_data(self, gsc_csv):
|
||||
"""Load Search Console queries for gap analysis."""
|
||||
queries = []
|
||||
if not gsc_csv.exists():
|
||||
self.log(f"⚠️ GSC file not found: {gsc_csv}")
|
||||
return queries
|
||||
|
||||
try:
|
||||
with open(gsc_csv, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
try:
|
||||
query = row.get('Query', '').strip()
|
||||
if not query:
|
||||
continue
|
||||
|
||||
impressions = int(row.get('Impressions', 0) or 0)
|
||||
clicks = int(row.get('Clicks', 0) or 0)
|
||||
|
||||
# Only include queries with impressions but low clicks
|
||||
if impressions > 0 and (clicks / impressions < 0.05):
|
||||
queries.append({
|
||||
'query': query,
|
||||
'impressions': impressions,
|
||||
'clicks': clicks,
|
||||
'ctr': clicks / impressions if impressions > 0 else 0
|
||||
})
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
self.log(f"✓ Loaded {len(queries)} underperforming queries")
|
||||
except Exception as e:
|
||||
self.log(f"⚠️ Error reading GSC file: {e}")
|
||||
|
||||
return queries
|
||||
|
||||
def extract_topics(self, posts):
|
||||
"""Extract topic clusters from post titles using AI."""
|
||||
if not self.client or len(posts) == 0:
|
||||
self.log("⚠️ Cannot extract topics without AI client or posts")
|
||||
return {}
|
||||
|
||||
try:
|
||||
self.log("🤖 Extracting topic clusters from post titles...")
|
||||
|
||||
# Batch posts into groups
|
||||
titles = [p['title'] for p in posts][:100] # Limit to first 100
|
||||
|
||||
prompt = f"""Analyze these {len(titles)} blog post titles and identify topic clusters:
|
||||
|
||||
Titles:
|
||||
{chr(10).join(f'{i+1}. {t}' for i, t in enumerate(titles))}
|
||||
|
||||
Extract for each post:
|
||||
1. Primary topic category
|
||||
2. Subtopics covered
|
||||
3. Content type (guide, tutorial, review, comparison, etc.)
|
||||
|
||||
Then identify:
|
||||
1. Top 10 topic clusters with post counts
|
||||
2. Most common subtopics
|
||||
3. Over/under-represented topics
|
||||
|
||||
Return JSON:
|
||||
{{
|
||||
"post_topics": {{
|
||||
"1": {{"primary": "...", "subtopics": ["..."], "type": "..."}},
|
||||
...
|
||||
}},
|
||||
"topic_clusters": [
|
||||
{{"cluster": "...", "post_count": 0, "importance": "high/medium/low"}}
|
||||
],
|
||||
"coverage_gaps": ["topic 1", "topic 2", ...],
|
||||
"niche": "detected niche or industry"
|
||||
}}"""
|
||||
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.config.AI_MODEL,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.7,
|
||||
max_tokens=1500
|
||||
)
|
||||
|
||||
try:
|
||||
result_text = response.choices[0].message.content
|
||||
start_idx = result_text.find('{')
|
||||
end_idx = result_text.rfind('}') + 1
|
||||
if start_idx >= 0 and end_idx > start_idx:
|
||||
return json.loads(result_text[start_idx:end_idx])
|
||||
except json.JSONDecodeError:
|
||||
self.log("⚠️ Could not parse topic extraction response")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"⚠️ Topic extraction failed: {e}")
|
||||
return {}
|
||||
|
||||
def identify_content_gaps(self, topic_analysis, queries):
|
||||
"""Use AI to identify content gaps and suggest new topics."""
|
||||
if not self.client:
|
||||
return []
|
||||
|
||||
try:
|
||||
self.log("🤖 Identifying content gaps and opportunities...")
|
||||
|
||||
clusters = topic_analysis.get('topic_clusters', [])
|
||||
gaps = topic_analysis.get('coverage_gaps', [])
|
||||
niche = topic_analysis.get('niche', 'general')
|
||||
|
||||
# Prepare query analysis
|
||||
top_queries = sorted(queries, key=lambda x: x['impressions'], reverse=True)[:20]
|
||||
queries_str = '\n'.join([f"- {q['query']} ({q['impressions']} impr, {q['ctr']:.1%} CTR)"
|
||||
for q in top_queries])
|
||||
|
||||
prompt = f"""Based on content analysis and search demand, identify content gaps:
|
||||
|
||||
Existing Topics: {', '.join([c.get('cluster', '') for c in clusters[:10]])}
|
||||
Coverage Gaps: {', '.join(gaps[:5])}
|
||||
Niche: {niche}
|
||||
|
||||
Top Underperforming Queries (low CTR despite impressions):
|
||||
{queries_str}
|
||||
|
||||
Identify high-value missing topics that could:
|
||||
1. Fill coverage gaps
|
||||
2. Target underperforming queries (CTR improvement)
|
||||
3. Capitalize on search demand
|
||||
4. Complement existing content
|
||||
|
||||
For each suggestion:
|
||||
- Topic title
|
||||
- Why it's valuable (search demand + intent)
|
||||
- Search volume estimate (high/medium/low)
|
||||
- How it complements existing content
|
||||
- Recommended content format
|
||||
- Estimated traffic potential
|
||||
|
||||
Prioritize by traffic opportunity. Max 20 ideas.
|
||||
|
||||
Return JSON:
|
||||
{{
|
||||
"content_opportunities": [
|
||||
{{
|
||||
"title": "...",
|
||||
"why_valuable": "...",
|
||||
"search_volume": "high/medium/low",
|
||||
"complements": "existing topic",
|
||||
"format": "guide/tutorial/comparison/review/list",
|
||||
"traffic_potential": number,
|
||||
"priority": "high/medium/low"
|
||||
}}
|
||||
]
|
||||
}}"""
|
||||
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.config.AI_MODEL,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.7,
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
try:
|
||||
result_text = response.choices[0].message.content
|
||||
start_idx = result_text.find('{')
|
||||
end_idx = result_text.rfind('}') + 1
|
||||
if start_idx >= 0 and end_idx > start_idx:
|
||||
result = json.loads(result_text[start_idx:end_idx])
|
||||
return result.get('content_opportunities', [])
|
||||
except json.JSONDecodeError:
|
||||
self.log("⚠️ Could not parse gap analysis response")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"⚠️ Gap analysis failed: {e}")
|
||||
return []
|
||||
|
||||
def export_gaps_csv(self, gaps, output_csv):
|
||||
"""Export content gaps to CSV."""
|
||||
if not gaps:
|
||||
self.log("⚠️ No gaps to export")
|
||||
return
|
||||
|
||||
try:
|
||||
fieldnames = [
|
||||
'priority', 'title', 'why_valuable', 'search_volume',
|
||||
'complements', 'format', 'traffic_potential'
|
||||
]
|
||||
|
||||
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
||||
writer.writeheader()
|
||||
|
||||
for gap in sorted(gaps, key=lambda x: x.get('priority') == 'high', reverse=True):
|
||||
writer.writerow(gap)
|
||||
|
||||
self.log(f"✓ Exported {len(gaps)} content gaps to {output_csv}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting CSV: {e}")
|
||||
|
||||
def export_topic_clusters_json(self, topic_analysis, output_json):
|
||||
"""Export topic analysis to JSON."""
|
||||
if not topic_analysis:
|
||||
return
|
||||
|
||||
try:
|
||||
with open(output_json, 'w', encoding='utf-8') as f:
|
||||
json.dump(topic_analysis, f, indent=2)
|
||||
|
||||
self.log(f"✓ Exported topic analysis to {output_json}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting JSON: {e}")
|
||||
|
||||
def export_log(self, log_file):
|
||||
"""Export analysis log."""
|
||||
try:
|
||||
with open(log_file, 'w', encoding='utf-8') as f:
|
||||
f.write("Content Gap Analysis Report\n")
|
||||
f.write("=" * 60 + "\n\n")
|
||||
|
||||
for msg in self.logs:
|
||||
f.write(msg + "\n")
|
||||
|
||||
self.log(f"✓ Exported log to {log_file}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting log: {e}")
|
||||
|
||||
def run(self, posts_csv, gsc_csv, output_csv):
|
||||
"""Run complete analysis workflow."""
|
||||
self.log("📊 Starting content gap analysis...")
|
||||
self.log(f"Posts: {posts_csv}")
|
||||
self.log(f"GSC queries: {gsc_csv}\n")
|
||||
|
||||
# Load data
|
||||
posts = self.load_posts(posts_csv)
|
||||
queries = self.load_gsc_data(gsc_csv)
|
||||
|
||||
if not posts:
|
||||
return
|
||||
|
||||
# Extract topics
|
||||
topic_analysis = self.extract_topics(posts)
|
||||
if topic_analysis:
|
||||
self.log(f"✓ Identified {len(topic_analysis.get('topic_clusters', []))} topic clusters")
|
||||
|
||||
# Identify gaps
|
||||
gaps = self.identify_content_gaps(topic_analysis, queries)
|
||||
if gaps:
|
||||
self.log(f"✓ Identified {len(gaps)} content opportunities")
|
||||
|
||||
# Export
|
||||
self.log("\n📁 Exporting results...")
|
||||
self.export_gaps_csv(gaps, output_csv)
|
||||
|
||||
topic_json = self.output_dir / 'topic_clusters.json'
|
||||
self.export_topic_clusters_json(topic_analysis, topic_json)
|
||||
|
||||
# Export log
|
||||
log_dir = self.output_dir / 'logs'
|
||||
log_dir.mkdir(exist_ok=True)
|
||||
log_file = log_dir / 'content_gap_analysis_log.txt'
|
||||
self.export_log(log_file)
|
||||
|
||||
self.log("\n✓ Content gap analysis complete!")
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(description='Analyze content gaps')
|
||||
parser.add_argument('--posts-csv', type=Path,
|
||||
default=Path('output/results/posts_with_analytics.csv'),
|
||||
help='Posts CSV')
|
||||
parser.add_argument('--gsc-queries', type=Path,
|
||||
default=Path('input/analytics/gsc/Requêtes.csv'),
|
||||
help='GSC queries CSV')
|
||||
parser.add_argument('--output', type=Path,
|
||||
default=Path('output/results/content_gaps.csv'),
|
||||
help='Output gaps CSV')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = ContentGapAnalyzer()
|
||||
analyzer.run(args.posts_csv, args.gsc_queries, args.output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,466 +0,0 @@
|
||||
"""
|
||||
Multi-Site Content Strategy Analyzer
|
||||
Analyzes all content (published + drafts) across 3 websites.
|
||||
Recommends optimal distribution and consolidation strategy.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class ContentStrategyAnalyzer:
|
||||
"""Analyze and optimize content distribution across multiple sites."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize analyzer."""
|
||||
self.output_dir = Path('output')
|
||||
self.output_dir.mkdir(exist_ok=True)
|
||||
(self.output_dir / 'analysis').mkdir(exist_ok=True)
|
||||
(self.output_dir / 'reports').mkdir(exist_ok=True)
|
||||
(self.output_dir / 'logs').mkdir(exist_ok=True)
|
||||
|
||||
self.logs = []
|
||||
|
||||
def log(self, message):
|
||||
"""Log message."""
|
||||
self.logs.append(message)
|
||||
print(message)
|
||||
|
||||
def load_wordpress_posts(self, csv_path):
|
||||
"""Load published WordPress posts."""
|
||||
posts = {}
|
||||
if not csv_path.exists():
|
||||
self.log(f"⚠️ WordPress posts file not found: {csv_path}")
|
||||
return posts
|
||||
|
||||
try:
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
post_id = row.get('ID') or row.get('post_id')
|
||||
if not post_id:
|
||||
continue
|
||||
|
||||
posts[post_id] = {
|
||||
'source': 'wordpress',
|
||||
'status': 'published',
|
||||
'title': row.get('Title') or row.get('title') or row.get('post_title') or '',
|
||||
'url': row.get('URL') or row.get('url') or row.get('post_url') or '',
|
||||
'author': row.get('Author') or row.get('author') or 'Unknown',
|
||||
'traffic': int(row.get('traffic', 0) or 0),
|
||||
'impressions': int(row.get('impressions', 0) or 0),
|
||||
'position': float(row.get('avg_position', 0) or 0),
|
||||
'category': row.get('Category') or row.get('category') or '',
|
||||
}
|
||||
|
||||
self.log(f"✓ Loaded {len(posts)} published WordPress posts")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error reading WordPress posts: {e}")
|
||||
|
||||
return posts
|
||||
|
||||
def load_draft_posts(self, csv_path):
|
||||
"""Load draft/unpublished posts."""
|
||||
posts = {}
|
||||
if not csv_path.exists():
|
||||
self.log(f"⚠️ Draft posts file not found: {csv_path}")
|
||||
return posts
|
||||
|
||||
try:
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
post_id = row.get('ID') or row.get('post_id')
|
||||
if not post_id:
|
||||
continue
|
||||
|
||||
posts[post_id] = {
|
||||
'source': 'draft',
|
||||
'status': 'draft',
|
||||
'title': row.get('Title') or row.get('title') or row.get('post_title') or '',
|
||||
'url': row.get('URL') or row.get('url') or row.get('post_url') or '',
|
||||
'author': row.get('Author') or row.get('author') or 'Unknown',
|
||||
'traffic': 0, # Drafts have no traffic
|
||||
'impressions': 0,
|
||||
'position': 0,
|
||||
'category': row.get('Category') or row.get('category') or '',
|
||||
}
|
||||
|
||||
self.log(f"✓ Loaded {len(posts)} draft posts")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error reading draft posts: {e}")
|
||||
|
||||
return posts
|
||||
|
||||
def classify_post_topic(self, post):
|
||||
"""Classify post into topic area."""
|
||||
title = post['title'].lower()
|
||||
category = post['category'].lower()
|
||||
content = f"{title} {category}"
|
||||
|
||||
# Topic classification based on keywords
|
||||
topic_keywords = {
|
||||
'torrent': ['torrent', 'ygg', 'ratio', 'tracker', 'magnet', 'seedbox', 'upload'],
|
||||
'streaming': ['stream', 'film', 'série', 'netflix', 'disney', 'platforma'],
|
||||
'vpn': ['vpn', 'proxy', 'anonyme', 'privacy', 'chiffr'],
|
||||
'software': ['software', 'tool', 'app', 'logiciel', 'outil', 'program'],
|
||||
'gaming': ['game', 'jeu', 'gaming', 'emula', 'console', 'retro'],
|
||||
'download': ['download', 'télécharge', 'ddl', 'upload'],
|
||||
'tech': ['tech', 'informatique', 'code', 'programming', 'developer'],
|
||||
'other': [],
|
||||
}
|
||||
|
||||
for topic, keywords in topic_keywords.items():
|
||||
if topic == 'other':
|
||||
continue
|
||||
for keyword in keywords:
|
||||
if keyword in content:
|
||||
return topic
|
||||
|
||||
return 'other'
|
||||
|
||||
def classify_website(self, post):
|
||||
"""Determine which website this post should be on."""
|
||||
topic = self.classify_post_topic(post)
|
||||
author = post.get('author', '').strip()
|
||||
is_sponsored = author == 'Expert'
|
||||
|
||||
# Website assignment rules
|
||||
if topic == 'torrent' or topic == 'download':
|
||||
return {
|
||||
'site': 'webscroll.fr',
|
||||
'reason': f'Torrent/file-sharing content',
|
||||
'priority': 'HIGH' if post['traffic'] > 100 else 'MEDIUM'
|
||||
}
|
||||
|
||||
if topic in ['vpn', 'software', 'gaming', 'tech']:
|
||||
return {
|
||||
'site': 'mistergeek.net',
|
||||
'reason': f'{topic.capitalize()} - core content',
|
||||
'priority': 'HIGH' if post['traffic'] > 50 else 'MEDIUM'
|
||||
}
|
||||
|
||||
if topic == 'streaming' and post['traffic'] < 100:
|
||||
return {
|
||||
'site': 'hellogeek.net',
|
||||
'reason': 'Low-traffic streaming content',
|
||||
'priority': 'LOW'
|
||||
}
|
||||
|
||||
if topic == 'other' or post['traffic'] < 10:
|
||||
return {
|
||||
'site': 'hellogeek.net',
|
||||
'reason': 'Off-brand or low-traffic content',
|
||||
'priority': 'LOW'
|
||||
}
|
||||
|
||||
# Default to main site
|
||||
return {
|
||||
'site': 'mistergeek.net',
|
||||
'reason': 'Core content',
|
||||
'priority': 'MEDIUM'
|
||||
}
|
||||
|
||||
def classify_content_action(self, post):
|
||||
"""Determine what action to take with this post."""
|
||||
topic = self.classify_post_topic(post)
|
||||
traffic = post.get('traffic', 0)
|
||||
impressions = post.get('impressions', 0)
|
||||
position = post.get('position', 0)
|
||||
status = post.get('status', 'published')
|
||||
|
||||
# Determine action
|
||||
if status == 'draft':
|
||||
if traffic == 0:
|
||||
return 'REVIEW_PUBLISH_OR_DELETE' # Unpublished draft
|
||||
else:
|
||||
return 'REPUBLISH' # Was published, now draft
|
||||
|
||||
if traffic < 5 and impressions < 20:
|
||||
return 'DELETE_OR_CONSOLIDATE'
|
||||
|
||||
if traffic > 0 and position > 0 and position < 11:
|
||||
return 'KEEP_OPTIMIZE'
|
||||
|
||||
if position > 11 and position < 30:
|
||||
return 'KEEP_OPTIMIZE'
|
||||
|
||||
if position > 30 or traffic < 10:
|
||||
return 'MOVE_TO_OTHER_SITE'
|
||||
|
||||
return 'KEEP_MONITOR'
|
||||
|
||||
def analyze_all_content(self, posts):
|
||||
"""Analyze and classify all posts."""
|
||||
analysis = {
|
||||
'total_posts': len(posts),
|
||||
'by_site': defaultdict(lambda: {'count': 0, 'traffic': 0, 'posts': []}),
|
||||
'by_topic': defaultdict(lambda: {'count': 0, 'traffic': 0, 'posts': []}),
|
||||
'by_action': defaultdict(lambda: {'count': 0, 'traffic': 0, 'posts': []}),
|
||||
'sponsored_posts': {'count': 0, 'traffic': 0, 'posts': []},
|
||||
'draft_posts': {'count': 0, 'posts': []},
|
||||
}
|
||||
|
||||
for post_id, post in posts.items():
|
||||
topic = self.classify_post_topic(post)
|
||||
site_assignment = self.classify_website(post)
|
||||
action = self.classify_content_action(post)
|
||||
is_sponsored = post.get('author', '').strip() == 'Expert'
|
||||
is_draft = post.get('status') == 'draft'
|
||||
|
||||
# Record in analysis
|
||||
analysis['by_site'][site_assignment['site']]['count'] += 1
|
||||
analysis['by_site'][site_assignment['site']]['traffic'] += post['traffic']
|
||||
analysis['by_site'][site_assignment['site']]['posts'].append({
|
||||
'id': post_id,
|
||||
'title': post['title'],
|
||||
'traffic': post['traffic'],
|
||||
'reason': site_assignment['reason']
|
||||
})
|
||||
|
||||
analysis['by_topic'][topic]['count'] += 1
|
||||
analysis['by_topic'][topic]['traffic'] += post['traffic']
|
||||
|
||||
analysis['by_action'][action]['count'] += 1
|
||||
analysis['by_action'][action]['traffic'] += post['traffic']
|
||||
|
||||
if is_sponsored:
|
||||
analysis['sponsored_posts']['count'] += 1
|
||||
analysis['sponsored_posts']['traffic'] += post['traffic']
|
||||
analysis['sponsored_posts']['posts'].append({
|
||||
'id': post_id,
|
||||
'title': post['title'],
|
||||
'traffic': post['traffic']
|
||||
})
|
||||
|
||||
if is_draft:
|
||||
analysis['draft_posts']['count'] += 1
|
||||
analysis['draft_posts']['posts'].append({
|
||||
'id': post_id,
|
||||
'title': post['title'],
|
||||
'status': 'draft'
|
||||
})
|
||||
|
||||
return analysis
|
||||
|
||||
def generate_content_distribution_csv(self, posts, output_path):
|
||||
"""Export detailed content distribution plan."""
|
||||
try:
|
||||
fieldnames = [
|
||||
'post_id', 'title', 'topic', 'status', 'author',
|
||||
'traffic', 'impressions', 'position',
|
||||
'recommended_site', 'reason', 'action',
|
||||
'priority', 'notes'
|
||||
]
|
||||
|
||||
rows = []
|
||||
for post_id, post in posts.items():
|
||||
topic = self.classify_post_topic(post)
|
||||
site_assignment = self.classify_website(post)
|
||||
action = self.classify_content_action(post)
|
||||
author = post.get('author', '').strip()
|
||||
is_sponsored = author == 'Expert'
|
||||
|
||||
rows.append({
|
||||
'post_id': post_id,
|
||||
'title': post['title'][:80],
|
||||
'topic': topic,
|
||||
'status': post.get('status', 'published'),
|
||||
'author': author,
|
||||
'traffic': post.get('traffic', 0),
|
||||
'impressions': post.get('impressions', 0),
|
||||
'position': post.get('position', 0),
|
||||
'recommended_site': site_assignment['site'],
|
||||
'reason': site_assignment['reason'],
|
||||
'action': action,
|
||||
'priority': site_assignment['priority'],
|
||||
'notes': 'SPONSORED' if is_sponsored else ''
|
||||
})
|
||||
|
||||
rows.sort(key=lambda x: x['traffic'], reverse=True)
|
||||
|
||||
with open(output_path, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
self.log(f"✓ Exported {len(rows)} posts to {output_path}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting CSV: {e}")
|
||||
|
||||
def generate_strategy_report(self, analysis, output_path):
|
||||
"""Generate comprehensive strategy report."""
|
||||
try:
|
||||
report = []
|
||||
report.append("# Multi-Site Content Strategy Report\n")
|
||||
report.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}*\n\n")
|
||||
|
||||
# Executive Summary
|
||||
report.append("## Executive Summary\n\n")
|
||||
report.append(f"**Total Content Analyzed:** {analysis['total_posts']} posts\n")
|
||||
report.append(f"- Published: {analysis['total_posts'] - analysis['draft_posts']['count']}\n")
|
||||
report.append(f"- Drafts: {analysis['draft_posts']['count']}\n")
|
||||
report.append(f"- Sponsored: {analysis['sponsored_posts']['count']}\n\n")
|
||||
|
||||
# Distribution Strategy
|
||||
report.append("## Recommended Site Distribution\n\n")
|
||||
for site, data in sorted(analysis['by_site'].items(),
|
||||
key=lambda x: x[1]['traffic'], reverse=True):
|
||||
report.append(f"### {site}\n")
|
||||
report.append(f"- Posts: {data['count']}\n")
|
||||
report.append(f"- Total Traffic: {data['traffic']:,} visits/month\n")
|
||||
report.append(f"- Top Posts:\n")
|
||||
for post in sorted(data['posts'], key=lambda x: x['traffic'], reverse=True)[:5]:
|
||||
report.append(f" - {post['title'][:60]} ({post['traffic']} visits)\n")
|
||||
report.append(f"\n")
|
||||
|
||||
# Topic Distribution
|
||||
report.append("## Content by Topic\n\n")
|
||||
for topic, data in sorted(analysis['by_topic'].items(),
|
||||
key=lambda x: x[1]['traffic'], reverse=True):
|
||||
report.append(f"- **{topic.title()}:** {data['count']} posts ({data['traffic']:,} visits)\n")
|
||||
report.append("\n")
|
||||
|
||||
# Actions Required
|
||||
report.append("## Required Actions\n\n")
|
||||
for action, data in sorted(analysis['by_action'].items(),
|
||||
key=lambda x: x[1]['count'], reverse=True):
|
||||
report.append(f"- **{action}:** {data['count']} posts ({data['traffic']:,} visits)\n")
|
||||
report.append("\n")
|
||||
|
||||
# Sponsored Content
|
||||
if analysis['sponsored_posts']['count'] > 0:
|
||||
report.append("## Sponsored Content (by 'Expert')\n\n")
|
||||
report.append(f"Total: {analysis['sponsored_posts']['count']} posts\n")
|
||||
report.append(f"Traffic: {analysis['sponsored_posts']['traffic']:,} visits/month\n\n")
|
||||
for post in sorted(analysis['sponsored_posts']['posts'],
|
||||
key=lambda x: x['traffic'], reverse=True)[:10]:
|
||||
report.append(f"- {post['title'][:70]} ({post['traffic']} visits)\n")
|
||||
report.append("\n")
|
||||
|
||||
# Draft Posts
|
||||
if analysis['draft_posts']['count'] > 0:
|
||||
report.append("## Draft Posts (Unpublished)\n\n")
|
||||
report.append(f"Total: {analysis['draft_posts']['count']} posts\n")
|
||||
report.append("*Decision needed: Publish, delete, or move to other site?*\n\n")
|
||||
for post in analysis['draft_posts']['posts'][:15]:
|
||||
report.append(f"- {post['title'][:70]}\n")
|
||||
report.append("\n")
|
||||
|
||||
# Recommendations
|
||||
report.append("## Strategic Recommendations\n\n")
|
||||
report.append("1. **Consolidate on mistergeek.net:**\n")
|
||||
report.append(" - Keep only VPN, software, gaming, tech content\n")
|
||||
report.append(" - Focus on high-traffic posts (>50 visits/month)\n\n")
|
||||
|
||||
report.append("2. **Move to webscroll.fr:**\n")
|
||||
report.append(" - All torrent/file-sharing content\n")
|
||||
report.append(" - File-specific guides\n\n")
|
||||
|
||||
report.append("3. **Move to hellogeek.net:**\n")
|
||||
report.append(" - Low-traffic content (<50 visits)\n")
|
||||
report.append(" - Off-brand content\n")
|
||||
report.append(" - Experimental/niche posts\n\n")
|
||||
|
||||
report.append("4. **Delete:**\n")
|
||||
report.append(f" - Posts with <5 visits and <20 impressions\n")
|
||||
report.append(" - Duplicates/thin content\n\n")
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(''.join(report))
|
||||
|
||||
self.log(f"✓ Generated strategy report: {output_path}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error generating report: {e}")
|
||||
|
||||
def run(self, wordpress_csv, drafts_csv):
|
||||
"""Run complete content strategy analysis."""
|
||||
self.log("\n" + "="*70)
|
||||
self.log("Multi-Site Content Strategy Analyzer")
|
||||
self.log("="*70 + "\n")
|
||||
|
||||
# Load posts
|
||||
self.log("📚 Loading content...\n")
|
||||
wordpress_posts = self.load_wordpress_posts(wordpress_csv)
|
||||
draft_posts = self.load_draft_posts(drafts_csv)
|
||||
|
||||
# Combine all posts
|
||||
all_posts = {**wordpress_posts, **draft_posts}
|
||||
self.log(f"Total posts: {len(all_posts)}\n")
|
||||
|
||||
# Analyze
|
||||
self.log("🔍 Analyzing content distribution...\n")
|
||||
analysis = self.analyze_all_content(all_posts)
|
||||
|
||||
# Generate outputs
|
||||
self.log("📊 Generating outputs...\n")
|
||||
|
||||
output_csv = self.output_dir / 'analysis' / 'content_distribution.csv'
|
||||
self.generate_content_distribution_csv(all_posts, output_csv)
|
||||
|
||||
output_md = self.output_dir / 'reports' / 'content_strategy_report.md'
|
||||
self.generate_strategy_report(analysis, output_md)
|
||||
|
||||
# Export analysis JSON
|
||||
analysis_json = self.output_dir / 'analysis' / 'analysis_summary.json'
|
||||
try:
|
||||
with open(analysis_json, 'w', encoding='utf-8') as f:
|
||||
# Convert defaultdict to regular dict for JSON serialization
|
||||
analysis_clean = {
|
||||
'total_posts': analysis['total_posts'],
|
||||
'by_site': dict(analysis['by_site']),
|
||||
'by_topic': {k: {'count': v['count'], 'traffic': v['traffic']}
|
||||
for k, v in analysis['by_topic'].items()},
|
||||
'by_action': {k: {'count': v['count'], 'traffic': v['traffic']}
|
||||
for k, v in analysis['by_action'].items()},
|
||||
'sponsored_posts': {
|
||||
'count': analysis['sponsored_posts']['count'],
|
||||
'traffic': analysis['sponsored_posts']['traffic']
|
||||
},
|
||||
'draft_posts': {
|
||||
'count': analysis['draft_posts']['count']
|
||||
}
|
||||
}
|
||||
json.dump(analysis_clean, f, indent=2, ensure_ascii=False)
|
||||
self.log(f"✓ Exported analysis JSON: {analysis_json}\n")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting JSON: {e}\n")
|
||||
|
||||
# Summary
|
||||
self.log("\n" + "="*70)
|
||||
self.log("ANALYSIS COMPLETE")
|
||||
self.log("="*70)
|
||||
self.log(f"\nOutputs:")
|
||||
self.log(f" Distribution: {output_csv}")
|
||||
self.log(f" Strategy: {output_md}")
|
||||
self.log(f" Summary: {analysis_json}\n")
|
||||
|
||||
self.log("Next steps:")
|
||||
self.log(" 1. Review content_strategy_report.md")
|
||||
self.log(" 2. Review content_distribution.csv")
|
||||
self.log(" 3. Decide: which posts go to which site?")
|
||||
self.log(" 4. Plan content consolidation")
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(description='Analyze content across multiple sites')
|
||||
parser.add_argument('--wordpress-csv', type=Path,
|
||||
default=Path('input/wordpress/new-propositions.csv'),
|
||||
help='WordPress posts CSV')
|
||||
parser.add_argument('--drafts-csv', type=Path,
|
||||
default=Path('input/drafts/drafts.csv'),
|
||||
help='Draft posts CSV')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = ContentStrategyAnalyzer()
|
||||
analyzer.run(args.wordpress_csv, args.drafts_csv)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,375 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced AI Analyzer - Selective analysis with in-place updates
|
||||
Analyzes posts and updates CSV with AI recommendations for:
|
||||
- Title optimization
|
||||
- Meta description optimization
|
||||
- Category suggestions
|
||||
- Site placement recommendations
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import requests
|
||||
from datetime import datetime
|
||||
from config import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EnhancedPostAnalyzer:
|
||||
"""Enhanced analyzer with selective column analysis and in-place updates."""
|
||||
|
||||
def __init__(self, csv_file: str, analyze_fields: Optional[List[str]] = None):
|
||||
"""
|
||||
Initialize analyzer.
|
||||
|
||||
Args:
|
||||
csv_file: Path to input CSV
|
||||
analyze_fields: List of fields to analyze ['title', 'meta_description', 'categories', 'site']
|
||||
If None, analyzes all fields
|
||||
"""
|
||||
self.csv_file = Path(csv_file)
|
||||
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
||||
self.ai_model = Config.AI_MODEL
|
||||
self.posts = []
|
||||
self.analyzed_posts = []
|
||||
self.api_calls = 0
|
||||
self.ai_cost = 0.0
|
||||
|
||||
# Default: analyze all fields
|
||||
if analyze_fields is None:
|
||||
self.analyze_fields = ['title', 'meta_description', 'categories', 'site']
|
||||
else:
|
||||
self.analyze_fields = analyze_fields
|
||||
|
||||
logger.info(f"Fields to analyze: {', '.join(self.analyze_fields)}")
|
||||
|
||||
def load_csv(self) -> bool:
|
||||
"""Load posts from CSV file."""
|
||||
logger.info(f"Loading CSV: {self.csv_file}")
|
||||
|
||||
if not self.csv_file.exists():
|
||||
logger.error(f"CSV file not found: {self.csv_file}")
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(self.csv_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
self.posts = list(reader)
|
||||
|
||||
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading CSV: {e}")
|
||||
return False
|
||||
|
||||
def get_ai_recommendations(self, batch: List[Dict], fields: List[str]) -> Optional[str]:
|
||||
"""Get AI recommendations for specific fields."""
|
||||
if not self.openrouter_api_key:
|
||||
logger.error("OPENROUTER_API_KEY not set")
|
||||
return None
|
||||
|
||||
# Format posts for AI
|
||||
formatted_posts = []
|
||||
for i, post in enumerate(batch, 1):
|
||||
post_text = f"{i}. POST ID: {post['post_id']}\n"
|
||||
post_text += f" Site: {post.get('site', '')}\n"
|
||||
|
||||
if 'title' in fields:
|
||||
post_text += f" Title: {post.get('title', '')}\n"
|
||||
|
||||
if 'meta_description' in fields:
|
||||
post_text += f" Meta Description: {post.get('meta_description', '')}\n"
|
||||
|
||||
if 'categories' in fields:
|
||||
post_text += f" Categories: {post.get('categories', '')}\n"
|
||||
|
||||
if 'content_preview' in post:
|
||||
post_text += f" Content Preview: {post.get('content_preview', '')[:300]}...\n"
|
||||
|
||||
formatted_posts.append(post_text)
|
||||
|
||||
posts_text = "\n".join(formatted_posts)
|
||||
|
||||
# Build prompt based on requested fields
|
||||
prompt_parts = ["Analyze these blog posts and provide recommendations.\n\n"]
|
||||
|
||||
if 'site' in fields:
|
||||
prompt_parts.append("""Website Strategy:
|
||||
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
|
||||
- webscroll.fr: Torrenting, File-Sharing, Tracker guides
|
||||
- hellogeek.net: Low-traffic, experimental, off-brand content
|
||||
|
||||
""")
|
||||
|
||||
prompt_parts.append(posts_text)
|
||||
prompt_parts.append("\nFor EACH post, provide a JSON object with:\n{\n")
|
||||
|
||||
if 'title' in fields:
|
||||
prompt_parts.append(' "proposed_title": "<Improved SEO title>",\n')
|
||||
prompt_parts.append(' "title_reason": "<Reason for title change>",\n')
|
||||
|
||||
if 'meta_description' in fields:
|
||||
prompt_parts.append(' "proposed_meta_description": "<Improved meta description (120-160 chars)>",\n')
|
||||
prompt_parts.append(' "meta_reason": "<Reason for meta description change>",\n')
|
||||
|
||||
if 'categories' in fields:
|
||||
prompt_parts.append(' "proposed_category": "<Best category>",\n')
|
||||
prompt_parts.append(' "category_reason": "<Reason for category change>",\n')
|
||||
|
||||
if 'site' in fields:
|
||||
prompt_parts.append(' "proposed_site": "<Best site for this post>",\n')
|
||||
prompt_parts.append(' "site_reason": "<Reason for site recommendation>",\n')
|
||||
|
||||
prompt_parts.append(' "confidence": "<High|Medium|Low>",\n')
|
||||
prompt_parts.append(' "priority": "<High|Medium|Low>"\n}')
|
||||
|
||||
prompt_parts.append("\nReturn ONLY a JSON array of objects, one per post.")
|
||||
|
||||
prompt = "".join(prompt_parts)
|
||||
|
||||
try:
|
||||
logger.info(f" Sending batch to AI for analysis...")
|
||||
|
||||
response = requests.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": self.ai_model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"temperature": 0.3,
|
||||
},
|
||||
timeout=60
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
self.api_calls += 1
|
||||
|
||||
# Track cost
|
||||
usage = result.get('usage', {})
|
||||
input_tokens = usage.get('prompt_tokens', 0)
|
||||
output_tokens = usage.get('completion_tokens', 0)
|
||||
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
||||
|
||||
recommendations_text = result['choices'][0]['message']['content'].strip()
|
||||
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
|
||||
|
||||
return recommendations_text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting AI recommendations: {e}")
|
||||
return None
|
||||
|
||||
def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
|
||||
"""Parse JSON recommendations from AI."""
|
||||
try:
|
||||
start_idx = recommendations_json.find('[')
|
||||
end_idx = recommendations_json.rfind(']') + 1
|
||||
|
||||
if start_idx == -1 or end_idx == 0:
|
||||
logger.error("Could not find JSON array in response")
|
||||
return []
|
||||
|
||||
json_str = recommendations_json[start_idx:end_idx]
|
||||
recommendations = json.loads(json_str)
|
||||
|
||||
return recommendations
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing JSON recommendations: {e}")
|
||||
return []
|
||||
|
||||
def analyze_posts(self, batch_size: int = 10) -> bool:
|
||||
"""Analyze all posts in batches."""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("ANALYZING POSTS WITH AI")
|
||||
logger.info("="*70 + "\n")
|
||||
|
||||
batches = [self.posts[i:i + batch_size] for i in range(0, len(self.posts), batch_size)]
|
||||
logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches...\n")
|
||||
|
||||
all_recommendations = {}
|
||||
|
||||
for batch_num, batch in enumerate(batches, 1):
|
||||
logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
|
||||
|
||||
recommendations_json = self.get_ai_recommendations(batch, self.analyze_fields)
|
||||
|
||||
if not recommendations_json:
|
||||
logger.error(f" Failed to get recommendations for batch {batch_num}")
|
||||
continue
|
||||
|
||||
recommendations = self.parse_recommendations(recommendations_json)
|
||||
|
||||
for rec in recommendations:
|
||||
all_recommendations[str(rec.get('post_id', ''))] = rec
|
||||
|
||||
logger.info(f" ✓ Got {len(recommendations)} recommendations")
|
||||
|
||||
logger.info(f"\n✓ Analysis complete!")
|
||||
logger.info(f" Total recommendations: {len(all_recommendations)}")
|
||||
logger.info(f" API calls: {self.api_calls}")
|
||||
logger.info(f" Estimated cost: ${self.ai_cost:.4f}")
|
||||
|
||||
# Map recommendations to posts
|
||||
for post in self.posts:
|
||||
post_id = str(post['post_id'])
|
||||
if post_id in all_recommendations:
|
||||
rec = all_recommendations[post_id]
|
||||
|
||||
# Add only requested fields
|
||||
if 'title' in self.analyze_fields:
|
||||
post['proposed_title'] = rec.get('proposed_title', post.get('title', ''))
|
||||
post['title_reason'] = rec.get('title_reason', '')
|
||||
|
||||
if 'meta_description' in self.analyze_fields:
|
||||
post['proposed_meta_description'] = rec.get('proposed_meta_description', post.get('meta_description', ''))
|
||||
post['meta_reason'] = rec.get('meta_reason', '')
|
||||
|
||||
if 'categories' in self.analyze_fields:
|
||||
post['proposed_category'] = rec.get('proposed_category', post.get('categories', ''))
|
||||
post['category_reason'] = rec.get('category_reason', '')
|
||||
|
||||
if 'site' in self.analyze_fields:
|
||||
post['proposed_site'] = rec.get('proposed_site', post.get('site', ''))
|
||||
post['site_reason'] = rec.get('site_reason', '')
|
||||
|
||||
# Common fields
|
||||
post['ai_confidence'] = rec.get('confidence', 'Medium')
|
||||
post['ai_priority'] = rec.get('priority', 'Medium')
|
||||
else:
|
||||
# Add empty fields for consistency
|
||||
if 'title' in self.analyze_fields:
|
||||
post['proposed_title'] = post.get('title', '')
|
||||
post['title_reason'] = 'No AI recommendation'
|
||||
|
||||
if 'meta_description' in self.analyze_fields:
|
||||
post['proposed_meta_description'] = post.get('meta_description', '')
|
||||
post['meta_reason'] = 'No AI recommendation'
|
||||
|
||||
if 'categories' in self.analyze_fields:
|
||||
post['proposed_category'] = post.get('categories', '')
|
||||
post['category_reason'] = 'No AI recommendation'
|
||||
|
||||
if 'site' in self.analyze_fields:
|
||||
post['proposed_site'] = post.get('site', '')
|
||||
post['site_reason'] = 'No AI recommendation'
|
||||
|
||||
post['ai_confidence'] = 'Unknown'
|
||||
post['ai_priority'] = 'Medium'
|
||||
|
||||
self.analyzed_posts.append(post)
|
||||
|
||||
return len(self.analyzed_posts) > 0
|
||||
|
||||
def export_results(self, output_file: Optional[str] = None, update_input: bool = False) -> str:
|
||||
"""
|
||||
Export results to CSV.
|
||||
|
||||
Args:
|
||||
output_file: Custom output path
|
||||
update_input: If True, update the input CSV file (creates backup)
|
||||
|
||||
Returns:
|
||||
Path to exported file
|
||||
"""
|
||||
if update_input:
|
||||
# Create backup of original file
|
||||
backup_file = self.csv_file.parent / f"{self.csv_file.stem}_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
||||
import shutil
|
||||
shutil.copy2(self.csv_file, backup_file)
|
||||
logger.info(f"✓ Created backup: {backup_file}")
|
||||
|
||||
output_file = self.csv_file
|
||||
elif not output_file:
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
output_file = output_dir / f'analyzed_posts_{timestamp}.csv'
|
||||
|
||||
output_file = Path(output_file)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not self.analyzed_posts:
|
||||
logger.error("No analyzed posts to export")
|
||||
return ""
|
||||
|
||||
# Build fieldnames - original fields + new fields
|
||||
original_fields = list(self.analyzed_posts[0].keys())
|
||||
|
||||
# Determine which new fields were added
|
||||
new_fields = []
|
||||
if 'title' in self.analyze_fields:
|
||||
new_fields.extend(['proposed_title', 'title_reason'])
|
||||
if 'meta_description' in self.analyze_fields:
|
||||
new_fields.extend(['proposed_meta_description', 'meta_reason'])
|
||||
if 'categories' in self.analyze_fields:
|
||||
new_fields.extend(['proposed_category', 'category_reason'])
|
||||
if 'site' in self.analyze_fields:
|
||||
new_fields.extend(['proposed_site', 'site_reason'])
|
||||
|
||||
new_fields.extend(['ai_confidence', 'ai_priority'])
|
||||
|
||||
fieldnames = original_fields + new_fields
|
||||
|
||||
logger.info(f"\nExporting results to: {output_file}")
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(self.analyzed_posts)
|
||||
|
||||
logger.info(f"✓ Exported {len(self.analyzed_posts)} posts")
|
||||
return str(output_file)
|
||||
|
||||
def run(self, output_file: Optional[str] = None, update_input: bool = False, batch_size: int = 10) -> str:
|
||||
"""Run complete analysis."""
|
||||
if not self.load_csv():
|
||||
sys.exit(1)
|
||||
|
||||
if not self.analyze_posts(batch_size=batch_size):
|
||||
logger.error("Failed to analyze posts")
|
||||
sys.exit(1)
|
||||
|
||||
return self.export_results(output_file=output_file, update_input=update_input)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point with argument parsing."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Enhanced AI analyzer with selective field analysis'
|
||||
)
|
||||
parser.add_argument('csv_file', help='Input CSV file')
|
||||
parser.add_argument('--output', '-o', help='Output CSV file (default: creates new file in output/)')
|
||||
parser.add_argument('--update', '-u', action='store_true', help='Update input CSV file (creates backup)')
|
||||
parser.add_argument('--fields', '-f', nargs='+',
|
||||
choices=['title', 'meta_description', 'categories', 'site'],
|
||||
help='Fields to analyze (default: all fields)')
|
||||
parser.add_argument('--batch-size', type=int, default=10, help='Batch size for AI analysis')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = EnhancedPostAnalyzer(args.csv_file, analyze_fields=args.fields)
|
||||
output_file = analyzer.run(
|
||||
output_file=args.output,
|
||||
update_input=args.update,
|
||||
batch_size=args.batch_size
|
||||
)
|
||||
|
||||
logger.info(f"\n✓ Analysis complete! Results saved to: {output_file}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,378 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Export All Posts to CSV for AI Decision Making
|
||||
Fetches complete post data from all 3 WordPress sites and exports to CSV
|
||||
for AI-powered categorization and movement recommendations.
|
||||
Uses credentials from .env file for secure authentication.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
import time
|
||||
from datetime import datetime
|
||||
import re
|
||||
from config import Config
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PostExporter:
|
||||
"""Export posts from WordPress sites to CSV for AI analysis."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the exporter with sites from Config."""
|
||||
self.sites = Config.WORDPRESS_SITES
|
||||
self.all_posts = []
|
||||
self.category_cache = {} # Cache category names by site
|
||||
|
||||
def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
|
||||
"""
|
||||
Fetch ALL posts from a site with full details.
|
||||
|
||||
Args:
|
||||
site_name: Website name
|
||||
site_config: Site configuration dict
|
||||
|
||||
Returns:
|
||||
List of posts with full metadata
|
||||
"""
|
||||
logger.info(f"\nFetching posts from {site_name}...")
|
||||
|
||||
posts = []
|
||||
page = 1
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
for status in ['publish', 'draft']:
|
||||
page = 1
|
||||
status_count = 0
|
||||
|
||||
while True:
|
||||
params = {
|
||||
'page': page,
|
||||
'per_page': 100,
|
||||
'status': status,
|
||||
}
|
||||
|
||||
try:
|
||||
logger.info(f" Fetching page {page} ({status} posts)...")
|
||||
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
page_posts = response.json()
|
||||
if not page_posts:
|
||||
break
|
||||
|
||||
posts.extend(page_posts)
|
||||
status_count += len(page_posts)
|
||||
logger.info(f" ✓ Got {len(page_posts)} posts (total: {len(posts)})")
|
||||
|
||||
page += 1
|
||||
time.sleep(0.5)
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if response.status_code == 400:
|
||||
logger.info(f" ℹ API limit reached (got {status_count} {status} posts)")
|
||||
break
|
||||
else:
|
||||
logger.error(f"Error on page {page}: {e}")
|
||||
break
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Error fetching from {site_name}: {e}")
|
||||
break
|
||||
|
||||
if status_count > 0:
|
||||
logger.info(f" ✓ Total {status} posts: {status_count}")
|
||||
|
||||
logger.info(f"✓ Total posts from {site_name}: {len(posts)}\n")
|
||||
return posts
|
||||
|
||||
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, str]:
|
||||
"""
|
||||
Fetch category names and slugs from a WordPress site.
|
||||
|
||||
Args:
|
||||
site_name: Website name
|
||||
site_config: Site configuration dict
|
||||
|
||||
Returns:
|
||||
Dict mapping category IDs to category names
|
||||
"""
|
||||
if site_name in self.category_cache:
|
||||
return self.category_cache[site_name]
|
||||
|
||||
logger.info(f" Fetching categories from {site_name}...")
|
||||
categories = {}
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/categories"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
try:
|
||||
# Fetch all categories (per_page=100)
|
||||
params = {'per_page': 100}
|
||||
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
cat_list = response.json()
|
||||
for cat in cat_list:
|
||||
categories[cat['id']] = {
|
||||
'name': cat.get('name', ''),
|
||||
'slug': cat.get('slug', ''),
|
||||
}
|
||||
logger.info(f" ✓ Fetched {len(categories)} categories")
|
||||
except Exception as e:
|
||||
logger.warning(f" Could not fetch categories from {site_name}: {e}")
|
||||
|
||||
self.category_cache[site_name] = categories
|
||||
return categories
|
||||
|
||||
def extract_post_details(self, post: Dict, site_name: str, category_map: Dict[int, Dict]) -> Dict:
|
||||
"""
|
||||
Extract all relevant details from a post for AI analysis.
|
||||
|
||||
Args:
|
||||
post: WordPress post object
|
||||
site_name: Website name
|
||||
category_map: Dict mapping category IDs to names
|
||||
|
||||
Returns:
|
||||
Dict with extracted post details
|
||||
"""
|
||||
# Title
|
||||
title = post.get('title', {})
|
||||
if isinstance(title, dict):
|
||||
title = title.get('rendered', '')
|
||||
|
||||
# Content (first 500 chars for context)
|
||||
content = post.get('content', {})
|
||||
if isinstance(content, dict):
|
||||
content = content.get('rendered', '')
|
||||
# Strip HTML tags for readability
|
||||
content_text = re.sub('<[^<]+?>', '', content)[:500]
|
||||
|
||||
# Excerpt
|
||||
excerpt = post.get('excerpt', {})
|
||||
if isinstance(excerpt, dict):
|
||||
excerpt = excerpt.get('rendered', '')
|
||||
excerpt_text = re.sub('<[^<]+?>', '', excerpt)
|
||||
|
||||
# Meta descriptions and SEO data
|
||||
meta_dict = post.get('meta', {}) if isinstance(post.get('meta'), dict) else {}
|
||||
|
||||
rank_math_title = meta_dict.get('rank_math_title', '')
|
||||
rank_math_description = meta_dict.get('rank_math_description', '')
|
||||
rank_math_keyword = meta_dict.get('rank_math_focus_keyword', '')
|
||||
yoast_description = meta_dict.get('_yoast_wpseo_metadesc', '')
|
||||
|
||||
meta_description = rank_math_description or yoast_description or ''
|
||||
|
||||
# Categories - convert IDs to names using category_map
|
||||
category_ids = post.get('categories', [])
|
||||
category_names = ', '.join([
|
||||
category_map.get(cat_id, {}).get('name', str(cat_id))
|
||||
for cat_id in category_ids
|
||||
]) if category_ids else ''
|
||||
|
||||
# Tags
|
||||
tags = post.get('tags', [])
|
||||
tag_names = ', '.join([str(t) for t in tags]) if tags else ''
|
||||
|
||||
# Author
|
||||
author_id = post.get('author', '')
|
||||
|
||||
# Date
|
||||
date_published = post.get('date', '')
|
||||
date_modified = post.get('modified', '')
|
||||
|
||||
# Status
|
||||
status = post.get('status', 'publish')
|
||||
|
||||
# URL
|
||||
url = post.get('link', '')
|
||||
|
||||
return {
|
||||
'site': site_name,
|
||||
'post_id': post['id'],
|
||||
'status': status,
|
||||
'title': title.strip(),
|
||||
'slug': post.get('slug', ''),
|
||||
'url': url,
|
||||
'author_id': author_id,
|
||||
'date_published': date_published,
|
||||
'date_modified': date_modified,
|
||||
'categories': category_names,
|
||||
'tags': tag_names,
|
||||
'excerpt': excerpt_text.strip(),
|
||||
'content_preview': content_text.strip(),
|
||||
'seo_title': rank_math_title,
|
||||
'meta_description': meta_description,
|
||||
'focus_keyword': rank_math_keyword,
|
||||
'word_count': len(content_text.split()),
|
||||
}
|
||||
|
||||
def export_to_csv(self, output_file: Optional[str] = None) -> str:
|
||||
"""
|
||||
Export all posts to CSV.
|
||||
|
||||
Args:
|
||||
output_file: Optional custom output path
|
||||
|
||||
Returns:
|
||||
Path to exported CSV file
|
||||
"""
|
||||
if not output_file:
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||
output_file = output_dir / f'all_posts_{date_str}.csv'
|
||||
|
||||
output_file = Path(output_file)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not self.all_posts:
|
||||
logger.error("No posts to export")
|
||||
return None
|
||||
|
||||
fieldnames = [
|
||||
'site',
|
||||
'post_id',
|
||||
'status',
|
||||
'title',
|
||||
'slug',
|
||||
'url',
|
||||
'author_id',
|
||||
'date_published',
|
||||
'date_modified',
|
||||
'categories',
|
||||
'tags',
|
||||
'excerpt',
|
||||
'content_preview',
|
||||
'seo_title',
|
||||
'meta_description',
|
||||
'focus_keyword',
|
||||
'word_count',
|
||||
]
|
||||
|
||||
logger.info(f"Exporting {len(self.all_posts)} posts to CSV...")
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for post in self.all_posts:
|
||||
writer.writerow({field: post.get(field, '') for field in fieldnames})
|
||||
|
||||
logger.info(f"✓ CSV exported to: {output_file}")
|
||||
return str(output_file)
|
||||
|
||||
def run(self):
|
||||
"""Run complete export process."""
|
||||
logger.info("="*70)
|
||||
logger.info("EXPORTING ALL POSTS FOR AI DECISION MAKING")
|
||||
logger.info("="*70)
|
||||
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
|
||||
logger.info("")
|
||||
|
||||
# Fetch from all sites
|
||||
total_posts_before = len(self.all_posts)
|
||||
|
||||
for site_name, config in self.sites.items():
|
||||
# Fetch categories for this site
|
||||
categories = self.fetch_category_names(site_name, config)
|
||||
|
||||
# Fetch posts for this site
|
||||
posts = self.fetch_posts_from_site(site_name, config)
|
||||
|
||||
if posts:
|
||||
for post in posts:
|
||||
post_details = self.extract_post_details(post, site_name, categories)
|
||||
self.all_posts.append(post_details)
|
||||
|
||||
if not self.all_posts:
|
||||
logger.error("No posts found on any site")
|
||||
sys.exit(1)
|
||||
|
||||
# Sort by site then by post_id
|
||||
self.all_posts.sort(key=lambda x: (x['site'], x['post_id']))
|
||||
|
||||
# Export to CSV
|
||||
csv_file = self.export_to_csv()
|
||||
|
||||
# Print summary
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("EXPORT SUMMARY")
|
||||
logger.info("="*70)
|
||||
|
||||
by_site = {}
|
||||
for post in self.all_posts:
|
||||
site = post['site']
|
||||
if site not in by_site:
|
||||
by_site[site] = {'total': 0, 'published': 0, 'draft': 0}
|
||||
by_site[site]['total'] += 1
|
||||
if post['status'] == 'publish':
|
||||
by_site[site]['published'] += 1
|
||||
else:
|
||||
by_site[site]['draft'] += 1
|
||||
|
||||
for site, stats in sorted(by_site.items()):
|
||||
logger.info(f"\n{site}:")
|
||||
logger.info(f" Total: {stats['total']}")
|
||||
logger.info(f" Published: {stats['published']}")
|
||||
logger.info(f" Drafts: {stats['draft']}")
|
||||
|
||||
total_posts = len(self.all_posts)
|
||||
total_published = sum(1 for p in self.all_posts if p['status'] == 'publish')
|
||||
total_drafts = sum(1 for p in self.all_posts if p['status'] == 'draft')
|
||||
|
||||
logger.info(f"\n{'─'*70}")
|
||||
logger.info(f"Total across all sites: {total_posts} posts")
|
||||
logger.info(f" Published: {total_published}")
|
||||
logger.info(f" Drafts: {total_drafts}")
|
||||
logger.info(f"{'─'*70}")
|
||||
|
||||
logger.info(f"\n✓ Export complete!")
|
||||
logger.info(f"✓ CSV file: {csv_file}")
|
||||
logger.info(f"\nCSV includes:")
|
||||
logger.info(f" • Site, Post ID, Status, Title, URL")
|
||||
logger.info(f" • Publication dates, Categories, Tags")
|
||||
logger.info(f" • Content preview (500 chars)")
|
||||
logger.info(f" • SEO title, Meta description, Focus keyword")
|
||||
logger.info(f" • Word count")
|
||||
logger.info(f"\nNext step: Upload CSV to Claude or other AI for:")
|
||||
logger.info(f" 1. Categorize by topic (VPN, software, gaming, torrenting, etc.)")
|
||||
logger.info(f" 2. Recommend which site each post should be on")
|
||||
logger.info(f" 3. Identify duplicates for consolidation")
|
||||
logger.info(f" 4. Flag posts for deletion (low-traffic, thin content)")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Export all posts from WordPress sites for AI decision making'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
help='Custom output CSV file path'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
exporter = PostExporter()
|
||||
exporter.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,778 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Multi-Site WordPress SEO Analyzer
|
||||
Fetches posts from 3 WordPress sites, analyzes titles and meta descriptions,
|
||||
and provides AI-powered optimization recommendations.
|
||||
"""
|
||||
|
||||
import os
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
import time
|
||||
from config import Config
|
||||
import sys
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MultiSiteSEOAnalyzer:
|
||||
"""Analyzes titles and meta descriptions across multiple WordPress sites."""
|
||||
|
||||
def __init__(self, progressive_csv: bool = True):
|
||||
"""
|
||||
Initialize the analyzer.
|
||||
|
||||
Args:
|
||||
progressive_csv: If True, write CSV progressively as posts are analyzed
|
||||
"""
|
||||
self.sites_config = Config.WORDPRESS_SITES
|
||||
self.posts_data = {}
|
||||
self.analysis_results = []
|
||||
self.api_calls = 0
|
||||
self.ai_cost = 0.0
|
||||
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
||||
self.progressive_csv = progressive_csv
|
||||
self.csv_file = None
|
||||
self.csv_writer = None
|
||||
|
||||
def fetch_posts_from_site(self, site_name: str, site_config: Dict,
|
||||
include_drafts: bool = False) -> List[Dict]:
|
||||
"""
|
||||
Fetch posts from a WordPress site using REST API.
|
||||
|
||||
Args:
|
||||
site_name: Name of the site (domain)
|
||||
site_config: Configuration dict with url, username, password
|
||||
include_drafts: If True, fetch both published and draft posts
|
||||
|
||||
Returns:
|
||||
List of posts with metadata
|
||||
"""
|
||||
logger.info(f"Fetching posts from {site_name}...")
|
||||
|
||||
posts = []
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
# Determine which statuses to fetch
|
||||
statuses = ['publish', 'draft'] if include_drafts else ['publish']
|
||||
status_str = ', '.join(statuses).replace('publish', 'published').replace('draft', 'drafts')
|
||||
|
||||
# Fetch each status separately to avoid 400 Bad Request on pagination
|
||||
for status in statuses:
|
||||
page = 1
|
||||
status_count = 0
|
||||
use_fields = True # Try with _fields first, fallback without if 400
|
||||
|
||||
while True:
|
||||
params = {
|
||||
'page': page,
|
||||
'per_page': 100,
|
||||
'status': status, # Single status per request
|
||||
}
|
||||
|
||||
# Add _fields only if not getting 400 errors
|
||||
if use_fields:
|
||||
params['_fields'] = 'id,title,slug,link,meta,status'
|
||||
|
||||
try:
|
||||
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
page_posts = response.json()
|
||||
if not page_posts:
|
||||
break
|
||||
|
||||
posts.extend(page_posts)
|
||||
status_count += len(page_posts)
|
||||
logger.info(f" ✓ Fetched {len(page_posts)} {status} posts (page {page})")
|
||||
|
||||
page += 1
|
||||
time.sleep(Config.API_DELAY_SECONDS)
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
# Handle 400 errors gracefully
|
||||
if response.status_code == 400 and use_fields and page == 1:
|
||||
# Retry page 1 without _fields parameter
|
||||
logger.info(f" ⓘ Retrying without _fields parameter...")
|
||||
use_fields = False
|
||||
continue
|
||||
elif response.status_code == 400:
|
||||
# Pagination or API limit reached
|
||||
logger.info(f" ⓘ API limit reached (fetched {status_count} {status} posts)")
|
||||
break
|
||||
else:
|
||||
logger.error(f"Error fetching page {page} from {site_name}: {e}")
|
||||
break
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Error fetching from {site_name}: {e}")
|
||||
break
|
||||
|
||||
if status_count > 0:
|
||||
logger.info(f" ✓ Total {status} posts: {status_count}")
|
||||
|
||||
logger.info(f"✓ Total posts from {site_name} ({status_str}): {len(posts)}")
|
||||
return posts
|
||||
|
||||
def extract_seo_data(self, post: Dict, site_name: str) -> Dict:
|
||||
"""
|
||||
Extract SEO-relevant data from a post.
|
||||
|
||||
Args:
|
||||
post: Post data from WordPress API
|
||||
site_name: Name of the site
|
||||
|
||||
Returns:
|
||||
Dict with extracted SEO data
|
||||
"""
|
||||
title = post.get('title', {})
|
||||
if isinstance(title, dict):
|
||||
title = title.get('rendered', '')
|
||||
|
||||
# Get meta description from various SEO plugins
|
||||
# Check multiple possible locations where different plugins store meta descriptions
|
||||
meta_desc = ''
|
||||
if isinstance(post.get('meta'), dict):
|
||||
meta_dict = post['meta']
|
||||
|
||||
# Try various SEO plugin fields (order matters - most specific first)
|
||||
meta_desc = (
|
||||
meta_dict.get('_yoast_wpseo_metadesc', '') or # Yoast SEO
|
||||
meta_dict.get('_rank_math_description', '') or # Rank Math
|
||||
meta_dict.get('_aioseo_description', '') or # All in One SEO
|
||||
meta_dict.get('description', '') or # Standard field
|
||||
meta_dict.get('_meta_description', '') or # Alternative
|
||||
meta_dict.get('metadesc', '') # Alternative
|
||||
)
|
||||
|
||||
# Get post status
|
||||
status = post.get('status', 'publish')
|
||||
|
||||
return {
|
||||
'site': site_name,
|
||||
'post_id': post['id'],
|
||||
'title': title.strip(),
|
||||
'slug': post.get('slug', ''),
|
||||
'url': post.get('link', ''),
|
||||
'meta_description': meta_desc.strip(),
|
||||
'status': status,
|
||||
}
|
||||
|
||||
def analyze_title(self, title: str) -> Dict:
|
||||
"""
|
||||
Analyze title for SEO best practices.
|
||||
|
||||
Args:
|
||||
title: Post title
|
||||
|
||||
Returns:
|
||||
Dict with analysis results
|
||||
"""
|
||||
length = len(title)
|
||||
|
||||
# SEO best practices
|
||||
issues = []
|
||||
recommendations = []
|
||||
score = 100
|
||||
|
||||
if length < 30:
|
||||
issues.append(f"Too short ({length})")
|
||||
recommendations.append("Expand title to 50-60 characters")
|
||||
score -= 20
|
||||
elif length < 50:
|
||||
recommendations.append("Could be slightly longer (target 50-60)")
|
||||
score -= 5
|
||||
elif length > 70:
|
||||
issues.append(f"Too long ({length})")
|
||||
recommendations.append("Consider shortening to 50-70 characters")
|
||||
score -= 15
|
||||
|
||||
# Check for power words
|
||||
power_words = ['best', 'ultimate', 'complete', 'essential', 'proven',
|
||||
'effective', 'powerful', 'expert', 'guide', 'tutorial',
|
||||
'how to', 'step by step', 'top 10', 'ultimate guide']
|
||||
|
||||
has_power_word = any(word.lower() in title.lower() for word in power_words)
|
||||
if not has_power_word:
|
||||
recommendations.append("Consider adding a power word (best, complete, guide, etc.)")
|
||||
score -= 10
|
||||
|
||||
# Check for numbers
|
||||
if not any(c.isdigit() for c in title):
|
||||
recommendations.append("Consider adding a number (e.g., 'Top 5', '2025')")
|
||||
score -= 5
|
||||
|
||||
# Check for emojis or special chars that might break rendering
|
||||
special_chars = set(title) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -:')
|
||||
if special_chars:
|
||||
recommendations.append(f"Check special characters: {special_chars}")
|
||||
score -= 5
|
||||
|
||||
return {
|
||||
'length': length,
|
||||
'issues': issues,
|
||||
'recommendations': recommendations,
|
||||
'score': max(0, score),
|
||||
'has_power_word': has_power_word,
|
||||
'has_number': any(c.isdigit() for c in title)
|
||||
}
|
||||
|
||||
def analyze_meta_description(self, meta_desc: str) -> Dict:
|
||||
"""
|
||||
Analyze meta description for SEO best practices.
|
||||
|
||||
Args:
|
||||
meta_desc: Meta description text
|
||||
|
||||
Returns:
|
||||
Dict with analysis results
|
||||
"""
|
||||
length = len(meta_desc)
|
||||
|
||||
issues = []
|
||||
recommendations = []
|
||||
score = 100
|
||||
|
||||
if not meta_desc or length == 0:
|
||||
issues.append("Missing meta description")
|
||||
recommendations.append("Write a 120-160 character meta description")
|
||||
score = 0
|
||||
else:
|
||||
if length < 100:
|
||||
issues.append(f"Too short ({length})")
|
||||
recommendations.append("Expand to 120-160 characters")
|
||||
score -= 20
|
||||
elif length < 120:
|
||||
recommendations.append("Could be slightly longer (target 120-160)")
|
||||
score -= 5
|
||||
elif length > 160:
|
||||
issues.append(f"Too long ({length})")
|
||||
recommendations.append("Shorten to 120-160 characters")
|
||||
score -= 15
|
||||
|
||||
# Check for CTA
|
||||
cta_words = ['learn', 'discover', 'read', 'explore', 'find', 'get',
|
||||
'download', 'check', 'see', 'watch', 'try', 'start']
|
||||
has_cta = any(word.lower() in meta_desc.lower() for word in cta_words)
|
||||
if not has_cta:
|
||||
recommendations.append("Consider adding a call-to-action")
|
||||
score -= 5
|
||||
|
||||
return {
|
||||
'length': length,
|
||||
'is_missing': not meta_desc,
|
||||
'issues': issues,
|
||||
'recommendations': recommendations,
|
||||
'score': max(0, score),
|
||||
}
|
||||
|
||||
def calculate_overall_score(self, title_analysis: Dict, meta_analysis: Dict) -> float:
|
||||
"""Calculate overall SEO score (0-100)."""
|
||||
title_weight = 0.4
|
||||
meta_weight = 0.6
|
||||
return (title_analysis['score'] * title_weight) + (meta_analysis['score'] * meta_weight)
|
||||
|
||||
def generate_ai_recommendations(self, post_data: Dict, title_analysis: Dict,
|
||||
meta_analysis: Dict) -> Optional[str]:
|
||||
"""
|
||||
Use Claude AI to generate specific optimization recommendations.
|
||||
|
||||
Args:
|
||||
post_data: Post data
|
||||
title_analysis: Title analysis results
|
||||
meta_analysis: Meta description analysis
|
||||
|
||||
Returns:
|
||||
AI-generated recommendations or None if AI disabled
|
||||
"""
|
||||
if not self.openrouter_api_key:
|
||||
return None
|
||||
|
||||
prompt = f"""Analyze this blog post and provide specific SEO optimization recommendations:
|
||||
|
||||
Post Title: "{post_data['title']}"
|
||||
Current Meta Description: "{post_data['meta_description'] or 'MISSING'}"
|
||||
URL: {post_data['url']}
|
||||
|
||||
Title Analysis:
|
||||
- Length: {title_analysis['length']} characters (target: 50-70)
|
||||
- Issues: {', '.join(title_analysis['issues']) or 'None'}
|
||||
|
||||
Meta Description Analysis:
|
||||
- Length: {meta_analysis['length']} characters (target: 120-160)
|
||||
- Issues: {', '.join(meta_analysis['issues']) or 'None'}
|
||||
|
||||
Provide 2-3 specific, actionable recommendations to improve SEO. Focus on:
|
||||
1. If title needs improvement: suggest a better title
|
||||
2. If meta description is missing: write one
|
||||
3. If both are weak: provide both improved versions
|
||||
|
||||
Format as:
|
||||
- Recommendation 1: [specific action]
|
||||
- Recommendation 2: [specific action]
|
||||
etc.
|
||||
|
||||
Be concise and specific."""
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "anthropic/claude-3.5-sonnet",
|
||||
"messages": [
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": 0.7,
|
||||
},
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
self.api_calls += 1
|
||||
|
||||
# Track cost (Claude 3.5 Sonnet: $3/$15 per 1M tokens)
|
||||
usage = result.get('usage', {})
|
||||
input_tokens = usage.get('prompt_tokens', 0)
|
||||
output_tokens = usage.get('completion_tokens', 0)
|
||||
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
||||
|
||||
recommendations = result['choices'][0]['message']['content'].strip()
|
||||
return recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"AI recommendation failed: {e}")
|
||||
return None
|
||||
|
||||
def _setup_progressive_csv(self) -> Optional[Tuple]:
|
||||
"""
|
||||
Setup CSV file for progressive writing.
|
||||
|
||||
Returns:
|
||||
Tuple of (file_handle, writer) or None if progressive_csv is False
|
||||
"""
|
||||
if not self.progressive_csv:
|
||||
return None
|
||||
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
csv_path = output_dir / f'seo_analysis_{timestamp}.csv'
|
||||
|
||||
fieldnames = [
|
||||
'site', 'post_id', 'status', 'title', 'slug', 'url',
|
||||
'meta_description', 'title_score', 'title_issues',
|
||||
'title_recommendations', 'meta_score', 'meta_issues',
|
||||
'meta_recommendations', 'overall_score', 'ai_recommendations',
|
||||
]
|
||||
|
||||
csv_file = open(csv_path, 'w', newline='', encoding='utf-8')
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
csv_file.flush()
|
||||
|
||||
logger.info(f"✓ CSV file created: {csv_path}")
|
||||
self.csv_file = csv_file
|
||||
self.csv_writer = writer
|
||||
|
||||
return csv_path
|
||||
|
||||
def _write_result_to_csv(self, result: Dict) -> None:
|
||||
"""Write a single result row to CSV file."""
|
||||
if self.progressive_csv and self.csv_writer:
|
||||
self.csv_writer.writerow(result)
|
||||
self.csv_file.flush()
|
||||
|
||||
def analyze_all_sites(self, use_ai: bool = True, top_n: int = 10,
|
||||
include_drafts: bool = False):
|
||||
"""
|
||||
Analyze all configured sites.
|
||||
|
||||
Args:
|
||||
use_ai: Whether to use AI for recommendations
|
||||
top_n: Number of top priority posts to get AI recommendations for
|
||||
include_drafts: If True, include draft posts in analysis
|
||||
"""
|
||||
logger.info(f"Starting analysis of {len(self.sites_config)} sites...")
|
||||
if include_drafts:
|
||||
logger.info("(Including draft posts)")
|
||||
logger.info("")
|
||||
|
||||
all_posts = []
|
||||
|
||||
# Fetch posts from all sites
|
||||
for site_name, config in self.sites_config.items():
|
||||
posts = self.fetch_posts_from_site(site_name, config, include_drafts=include_drafts)
|
||||
if posts:
|
||||
self.posts_data[site_name] = posts
|
||||
all_posts.extend(posts)
|
||||
|
||||
if not all_posts:
|
||||
logger.error("No posts found on any site")
|
||||
return
|
||||
|
||||
logger.info(f"\nAnalyzing {len(all_posts)} posts...\n")
|
||||
|
||||
# Setup progressive CSV if enabled
|
||||
csv_path = self._setup_progressive_csv()
|
||||
|
||||
# Analyze each post
|
||||
for site_name, posts in self.posts_data.items():
|
||||
logger.info(f"Analyzing {len(posts)} posts from {site_name}...")
|
||||
|
||||
for idx, post in enumerate(posts, 1):
|
||||
seo_data = self.extract_seo_data(post, site_name)
|
||||
title_analysis = self.analyze_title(seo_data['title'])
|
||||
meta_analysis = self.analyze_meta_description(seo_data['meta_description'])
|
||||
overall_score = self.calculate_overall_score(title_analysis, meta_analysis)
|
||||
|
||||
result = {
|
||||
**seo_data,
|
||||
'title_score': title_analysis['score'],
|
||||
'title_issues': '|'.join(title_analysis['issues']) or 'None',
|
||||
'title_recommendations': '|'.join(title_analysis['recommendations']),
|
||||
'meta_score': meta_analysis['score'],
|
||||
'meta_issues': '|'.join(meta_analysis['issues']) or 'None',
|
||||
'meta_recommendations': '|'.join(meta_analysis['recommendations']),
|
||||
'overall_score': overall_score,
|
||||
'ai_recommendations': '',
|
||||
}
|
||||
|
||||
self.analysis_results.append(result)
|
||||
|
||||
# Write to CSV progressively (before AI recommendations)
|
||||
if self.progressive_csv:
|
||||
self._write_result_to_csv(result)
|
||||
logger.debug(f" [{idx}/{len(posts)}] Written: {seo_data['title'][:40]}")
|
||||
|
||||
# Sort by priority (lowest scores first) and get AI recommendations for top posts
|
||||
if use_ai:
|
||||
self.analysis_results.sort(key=lambda x: x['overall_score'])
|
||||
logger.info(f"\nGenerating AI recommendations for top {top_n} posts...\n")
|
||||
|
||||
for idx, result in enumerate(self.analysis_results[:top_n], 1):
|
||||
logger.info(f" [{idx}/{top_n}] {result['title'][:50]}...")
|
||||
|
||||
ai_recs = self.generate_ai_recommendations(
|
||||
result,
|
||||
{
|
||||
'score': result['title_score'],
|
||||
'issues': result['title_issues'].split('|'),
|
||||
'length': len(result['title'])
|
||||
},
|
||||
{
|
||||
'score': result['meta_score'],
|
||||
'issues': result['meta_issues'].split('|'),
|
||||
'length': len(result['meta_description'])
|
||||
}
|
||||
)
|
||||
|
||||
result['ai_recommendations'] = ai_recs or ''
|
||||
|
||||
# Update CSV with AI recommendations if using progressive CSV
|
||||
if self.progressive_csv and self.csv_writer:
|
||||
# Find and update the row in the CSV by re-writing it
|
||||
# This is a limitation of CSV - we'll update in final export instead
|
||||
pass
|
||||
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
# Sort by overall score for final export
|
||||
self.analysis_results.sort(key=lambda x: x['overall_score'])
|
||||
|
||||
# Close progressive CSV if open (will be re-written with final data including AI recs)
|
||||
if self.progressive_csv and self.csv_file:
|
||||
self.csv_file.close()
|
||||
self.csv_file = None
|
||||
self.csv_writer = None
|
||||
|
||||
def export_results(self, output_file: Optional[str] = None):
|
||||
"""
|
||||
Export analysis results to CSV.
|
||||
|
||||
Args:
|
||||
output_file: Output file path (optional)
|
||||
"""
|
||||
if not output_file:
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if self.progressive_csv:
|
||||
# Use same timestamp as progressive file
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
# Find the most recent seo_analysis file
|
||||
files = sorted(output_dir.glob('seo_analysis_*.csv'))
|
||||
if files:
|
||||
output_file = files[-1] # Use the most recent one
|
||||
else:
|
||||
output_file = output_dir / f'seo_analysis_{timestamp}_final.csv'
|
||||
else:
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
output_file = output_dir / f'seo_analysis_{timestamp}.csv'
|
||||
|
||||
output_file = Path(output_file)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not self.analysis_results:
|
||||
logger.error("No results to export")
|
||||
return
|
||||
|
||||
fieldnames = [
|
||||
'site',
|
||||
'post_id',
|
||||
'status',
|
||||
'title',
|
||||
'slug',
|
||||
'url',
|
||||
'meta_description',
|
||||
'title_score',
|
||||
'title_issues',
|
||||
'title_recommendations',
|
||||
'meta_score',
|
||||
'meta_issues',
|
||||
'meta_recommendations',
|
||||
'overall_score',
|
||||
'ai_recommendations',
|
||||
]
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for result in self.analysis_results:
|
||||
writer.writerow({field: result.get(field, '') for field in fieldnames})
|
||||
|
||||
if self.progressive_csv:
|
||||
logger.info(f"\n✓ Final results saved to: {output_file}")
|
||||
else:
|
||||
logger.info(f"\n✓ Results exported to: {output_file}")
|
||||
|
||||
# Also export as a summary report
|
||||
self.export_summary_report(output_file)
|
||||
|
||||
def export_summary_report(self, csv_file: Path):
|
||||
"""Export a markdown summary report."""
|
||||
report_file = csv_file.parent / f"{csv_file.stem}_summary.md"
|
||||
|
||||
# Group by site
|
||||
by_site = {}
|
||||
for result in self.analysis_results:
|
||||
site = result['site']
|
||||
if site not in by_site:
|
||||
by_site[site] = []
|
||||
by_site[site].append(result)
|
||||
|
||||
with open(report_file, 'w', encoding='utf-8') as f:
|
||||
f.write("# Multi-Site SEO Analysis Report\n\n")
|
||||
f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
||||
|
||||
# Summary stats
|
||||
total_posts = len(self.analysis_results)
|
||||
published = sum(1 for r in self.analysis_results if r['status'] == 'publish')
|
||||
drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft')
|
||||
avg_score = sum(r['overall_score'] for r in self.analysis_results) / total_posts if total_posts > 0 else 0
|
||||
|
||||
f.write("## Summary\n\n")
|
||||
f.write(f"- **Total Posts:** {total_posts}\n")
|
||||
if published > 0:
|
||||
f.write(f" - Published: {published}\n")
|
||||
if drafts > 0:
|
||||
f.write(f" - Drafts: {drafts}\n")
|
||||
f.write(f"- **Average SEO Score:** {avg_score:.1f}/100\n")
|
||||
f.write(f"- **API Calls Made:** {self.api_calls}\n")
|
||||
f.write(f"- **AI Cost:** ${self.ai_cost:.4f}\n")
|
||||
f.write(f"- **Sites Analyzed:** {len(by_site)}\n\n")
|
||||
|
||||
# Priority issues
|
||||
missing_meta = sum(1 for r in self.analysis_results if r['meta_score'] == 0)
|
||||
weak_titles = sum(1 for r in self.analysis_results if r['title_score'] < 50)
|
||||
weak_meta = sum(1 for r in self.analysis_results if r['meta_score'] < 50 and r['meta_score'] > 0)
|
||||
|
||||
f.write("## Priority Issues\n\n")
|
||||
f.write(f"- **Missing Meta Descriptions:** {missing_meta} posts\n")
|
||||
f.write(f"- **Weak Titles (Score < 50):** {weak_titles} posts\n")
|
||||
f.write(f"- **Weak Meta (Score < 50):** {weak_meta} posts\n\n")
|
||||
|
||||
# By site
|
||||
for site_name, posts in by_site.items():
|
||||
avg = sum(p['overall_score'] for p in posts) / len(posts)
|
||||
f.write(f"## {site_name}\n\n")
|
||||
f.write(f"- **Posts:** {len(posts)}\n")
|
||||
f.write(f"- **Avg Score:** {avg:.1f}/100\n")
|
||||
f.write(f"- **Missing Meta:** {sum(1 for p in posts if p['meta_score'] == 0)}\n\n")
|
||||
|
||||
# Top 5 to optimize
|
||||
f.write("### Top 5 Posts to Optimize\n\n")
|
||||
for idx, post in enumerate(posts[:5], 1):
|
||||
f.write(f"{idx}. **{post['title']}** (Score: {post['overall_score']:.0f})\n")
|
||||
f.write(f" - URL: {post['url']}\n")
|
||||
if post['meta_issues'] != 'None':
|
||||
f.write(f" - Meta Issues: {post['meta_issues']}\n")
|
||||
if post['ai_recommendations']:
|
||||
f.write(f" - Recommendations: {post['ai_recommendations'].split(chr(10))[0]}\n")
|
||||
f.write("\n")
|
||||
|
||||
f.write("\n## Legend\n\n")
|
||||
f.write("- **Title Score:** Evaluates length, power words, numbers, readability\n")
|
||||
f.write("- **Meta Score:** Evaluates presence, length, call-to-action\n")
|
||||
f.write("- **Overall Score:** 40% title + 60% meta description\n")
|
||||
f.write("- **Optimal Ranges:**\n")
|
||||
f.write(" - Title: 50-70 characters\n")
|
||||
f.write(" - Meta: 120-160 characters\n")
|
||||
|
||||
logger.info(f"✓ Summary report: {report_file}")
|
||||
|
||||
def run(self, use_ai: bool = True, top_n: int = 10, include_drafts: bool = False):
|
||||
"""Run complete analysis."""
|
||||
try:
|
||||
self.analyze_all_sites(use_ai=use_ai, top_n=top_n, include_drafts=include_drafts)
|
||||
self.export_results()
|
||||
|
||||
logger.info("\n" + "="*60)
|
||||
logger.info("ANALYSIS COMPLETE")
|
||||
logger.info("="*60)
|
||||
logger.info(f"Total posts analyzed: {len(self.analysis_results)}")
|
||||
published = sum(1 for r in self.analysis_results if r['status'] == 'publish')
|
||||
drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft')
|
||||
if published > 0:
|
||||
logger.info(f" - Published: {published}")
|
||||
if drafts > 0:
|
||||
logger.info(f" - Drafts: {drafts}")
|
||||
logger.info(f"AI recommendations: {sum(1 for r in self.analysis_results if r['ai_recommendations'])}")
|
||||
logger.info(f"AI cost: ${self.ai_cost:.4f}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Analysis failed: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def check_meta_fields(site_url: str, username: str, password: str) -> None:
|
||||
"""
|
||||
Diagnostic function to check what meta fields are available on a site.
|
||||
|
||||
Args:
|
||||
site_url: WordPress site URL
|
||||
username: WordPress username
|
||||
password: WordPress app password
|
||||
"""
|
||||
logger.info(f"\n{'='*60}")
|
||||
logger.info("META FIELD DIAGNOSTIC")
|
||||
logger.info(f"{'='*60}\n")
|
||||
logger.info(f"Site: {site_url}")
|
||||
logger.info("Checking available meta fields in first post...\n")
|
||||
|
||||
base_url = site_url.rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||
auth = HTTPBasicAuth(username, password)
|
||||
|
||||
try:
|
||||
params = {
|
||||
'per_page': 1,
|
||||
'status': 'publish'
|
||||
}
|
||||
|
||||
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
posts = response.json()
|
||||
if not posts:
|
||||
logger.error("No posts found")
|
||||
return
|
||||
|
||||
post = posts[0]
|
||||
logger.info(f"Post: {post.get('title', {}).get('rendered', 'N/A')}")
|
||||
logger.info(f"\nAvailable meta fields:")
|
||||
|
||||
if isinstance(post.get('meta'), dict):
|
||||
meta_dict = post['meta']
|
||||
if meta_dict:
|
||||
for key, value in sorted(meta_dict.items()):
|
||||
preview = str(value)[:60]
|
||||
logger.info(f" • {key}: {preview}")
|
||||
else:
|
||||
logger.info(" (No meta fields found)")
|
||||
else:
|
||||
logger.info(" (Meta is not a dictionary)")
|
||||
|
||||
logger.info(f"\nFull meta object:")
|
||||
logger.info(json.dumps(post.get('meta', {}), indent=2)[:500])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze SEO across multiple WordPress sites'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--no-ai',
|
||||
action='store_true',
|
||||
help='Skip AI recommendations to save cost'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--top-n',
|
||||
type=int,
|
||||
default=10,
|
||||
help='Number of top posts to get AI recommendations for'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
help='Output CSV file path'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--include-drafts',
|
||||
action='store_true',
|
||||
help='Include draft posts in analysis (published + drafts)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--no-progressive',
|
||||
action='store_true',
|
||||
help='Disable real-time CSV writing (write only at end)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--diagnose',
|
||||
help='Diagnose meta fields for a site (URL). Example: --diagnose https://www.mistergeek.net'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Diagnostic mode
|
||||
if args.diagnose:
|
||||
# Ask for username/password if not in env
|
||||
from getpass import getpass
|
||||
username = Config.WORDPRESS_USERNAME
|
||||
password = Config.WORDPRESS_APP_PASSWORD
|
||||
|
||||
if not username or not password:
|
||||
logger.error("WORDPRESS_USERNAME and WORDPRESS_APP_PASSWORD must be set in .env")
|
||||
sys.exit(1)
|
||||
|
||||
check_meta_fields(args.diagnose, username, password)
|
||||
sys.exit(0)
|
||||
|
||||
analyzer = MultiSiteSEOAnalyzer(progressive_csv=not args.no_progressive)
|
||||
analyzer.run(use_ai=not args.no_ai, top_n=args.top_n, include_drafts=args.include_drafts)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,347 +0,0 @@
|
||||
"""
|
||||
Keyword opportunity analyzer for SEO optimization.
|
||||
Identifies high-potential keywords ranking at positions 11-30.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import argparse
|
||||
import time
|
||||
from pathlib import Path
|
||||
from openai import OpenAI
|
||||
from config import Config
|
||||
|
||||
|
||||
class OpportunityAnalyzer:
|
||||
"""Analyze keyword opportunities for SEO optimization."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize analyzer."""
|
||||
self.config = Config
|
||||
self.output_dir = self.config.OUTPUT_DIR
|
||||
self.logs = []
|
||||
self.client = None
|
||||
|
||||
if self.config.OPENROUTER_API_KEY:
|
||||
self.client = OpenAI(
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_key=self.config.OPENROUTER_API_KEY,
|
||||
)
|
||||
|
||||
def log(self, message):
|
||||
"""Add message to log."""
|
||||
self.logs.append(message)
|
||||
print(message)
|
||||
|
||||
def load_posts(self, posts_csv):
|
||||
"""Load posts with analytics data."""
|
||||
posts = []
|
||||
if not posts_csv.exists():
|
||||
self.log(f"❌ File not found: {posts_csv}")
|
||||
return posts
|
||||
|
||||
try:
|
||||
with open(posts_csv, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
try:
|
||||
posts.append({
|
||||
'id': row.get('ID', ''),
|
||||
'title': row.get('Title', ''),
|
||||
'url': row.get('URL', ''),
|
||||
'impressions': int(row.get('impressions', 0) or 0),
|
||||
'clicks': int(row.get('clicks', 0) or 0),
|
||||
'avg_position': float(row.get('avg_position', 0) or 0),
|
||||
'ctr': float(row.get('ctr', 0) or 0),
|
||||
'traffic': int(row.get('traffic', 0) or 0),
|
||||
'bounce_rate': float(row.get('bounce_rate', 0) or 0),
|
||||
'keywords_count': int(row.get('keywords_count', 0) or 0),
|
||||
'top_keywords': row.get('top_keywords', '')
|
||||
})
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
self.log(f"✓ Loaded {len(posts)} posts")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error reading posts: {e}")
|
||||
|
||||
return posts
|
||||
|
||||
def filter_opportunities(self, posts, min_pos, max_pos, min_impressions):
|
||||
"""Filter posts with keywords in opportunity range or high traffic for optimization."""
|
||||
opportunities = []
|
||||
|
||||
for post in posts:
|
||||
position = post.get('avg_position', 0)
|
||||
impressions = post.get('impressions', 0)
|
||||
traffic = post.get('traffic', 0)
|
||||
|
||||
# Primary filter: position range (if data available)
|
||||
if position > 0:
|
||||
if min_pos <= position <= max_pos and impressions >= min_impressions:
|
||||
opportunities.append(post)
|
||||
# Fallback: filter by traffic when position data unavailable
|
||||
# Include posts with any traffic for optimization analysis
|
||||
elif traffic > 0:
|
||||
opportunities.append(post)
|
||||
|
||||
self.log(f"✓ Found {len(opportunities)} posts for optimization analysis")
|
||||
if opportunities:
|
||||
traffic_posts = [p for p in opportunities if p.get('traffic', 0) > 0]
|
||||
self.log(f" ({len(traffic_posts)} have traffic data, {len(opportunities) - len(traffic_posts)} selected for analysis)")
|
||||
return opportunities
|
||||
|
||||
def calculate_opportunity_score(self, post):
|
||||
"""Calculate opportunity score (0-100) for a post."""
|
||||
position = post.get('avg_position', 50)
|
||||
impressions = post.get('impressions', 0)
|
||||
ctr = post.get('ctr', 0)
|
||||
traffic = post.get('traffic', 0)
|
||||
|
||||
# Position score (35%): Closer to page 1 = higher
|
||||
# Position 11-30 range
|
||||
position_score = max(0, (30 - position) / 19 * 35)
|
||||
|
||||
# Traffic potential (30%): Based on impressions
|
||||
# Normalize to 0-30
|
||||
traffic_potential = min(30, (impressions / 1000) * 30)
|
||||
|
||||
# CTR improvement potential (20%): Gap between current and expected CTR
|
||||
# Expected CTR at position X
|
||||
expected_ctr_map = {
|
||||
11: 0.02, 12: 0.02, 13: 0.015, 14: 0.015, 15: 0.013,
|
||||
16: 0.012, 17: 0.011, 18: 0.01, 19: 0.009, 20: 0.008,
|
||||
21: 0.008, 22: 0.007, 23: 0.007, 24: 0.006, 25: 0.006,
|
||||
26: 0.006, 27: 0.005, 28: 0.005, 29: 0.005, 30: 0.004
|
||||
}
|
||||
expected_ctr = expected_ctr_map.get(int(position), 0.005)
|
||||
ctr_gap = max(0, expected_ctr - ctr)
|
||||
ctr_score = min(20, (ctr_gap / expected_ctr * 100 / 5) * 20)
|
||||
|
||||
# Content quality (15%): Existing traffic and engagement
|
||||
quality_score = min(15, (traffic / 100) * 7.5 +
|
||||
(100 - post.get('bounce_rate', 50)) / 100 * 7.5)
|
||||
|
||||
return round(position_score + traffic_potential + ctr_score + quality_score, 1)
|
||||
|
||||
def estimate_traffic_gain(self, post):
|
||||
"""Estimate potential traffic gain from optimization."""
|
||||
position = post.get('avg_position', 50)
|
||||
impressions = post.get('impressions', 0)
|
||||
ctr = post.get('ctr', 0)
|
||||
|
||||
# Estimate CTR improvement from moving one position up
|
||||
# Moving from position X to X-1 typically improves CTR by 20-30%
|
||||
current_traffic = impressions * ctr
|
||||
if position > 11:
|
||||
# Target position: 1 ahead
|
||||
improvement_factor = 1.25 # 25% improvement per position
|
||||
estimated_new_traffic = current_traffic * improvement_factor
|
||||
gain = estimated_new_traffic - current_traffic
|
||||
else:
|
||||
gain = 0
|
||||
|
||||
return round(gain, 0)
|
||||
|
||||
def generate_ai_recommendations(self, post):
|
||||
"""Generate AI recommendations for top opportunities."""
|
||||
if not self.client:
|
||||
return None
|
||||
|
||||
try:
|
||||
keywords = post.get('top_keywords', '').split(',')[:5]
|
||||
keywords_str = ', '.join([k.strip() for k in keywords if k.strip()])
|
||||
|
||||
prompt = f"""Analyze keyword optimization opportunities for this blog post:
|
||||
|
||||
Post Title: {post['title']}
|
||||
Current Position: {post['avg_position']:.1f}
|
||||
Monthly Impressions: {post['impressions']}
|
||||
Current CTR: {post['ctr']:.2%}
|
||||
Top Keywords: {keywords_str}
|
||||
|
||||
Provide 2-3 specific, actionable recommendations to:
|
||||
1. Improve the SEO title to increase CTR
|
||||
2. Enhance the meta description
|
||||
3. Target structural improvements (headers, content gaps)
|
||||
|
||||
Focus on moving this post from positions 11-20 to page 1 (positions 1-10).
|
||||
Be specific and practical.
|
||||
|
||||
Return as JSON:
|
||||
{{
|
||||
"title_recommendations": ["recommendation 1", "recommendation 2"],
|
||||
"description_recommendations": ["recommendation 1", "recommendation 2"],
|
||||
"content_recommendations": ["recommendation 1", "recommendation 2"],
|
||||
"estimated_effort_hours": number,
|
||||
"expected_position_improvement": number
|
||||
}}"""
|
||||
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.config.AI_MODEL,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.7,
|
||||
max_tokens=500
|
||||
)
|
||||
|
||||
try:
|
||||
result_text = response.choices[0].message.content
|
||||
# Extract JSON
|
||||
start_idx = result_text.find('{')
|
||||
end_idx = result_text.rfind('}') + 1
|
||||
if start_idx >= 0 and end_idx > start_idx:
|
||||
return json.loads(result_text[start_idx:end_idx])
|
||||
except json.JSONDecodeError:
|
||||
self.log(f"⚠️ Could not parse AI response for {post['title']}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"⚠️ AI generation failed for {post['title']}: {e}")
|
||||
return None
|
||||
|
||||
def export_opportunities_csv(self, opportunities, output_csv):
|
||||
"""Export opportunities to CSV."""
|
||||
if not opportunities:
|
||||
self.log("⚠️ No opportunities to export")
|
||||
return
|
||||
|
||||
try:
|
||||
fieldnames = [
|
||||
'ID', 'Title', 'URL', 'avg_position', 'impressions', 'clicks',
|
||||
'ctr', 'traffic', 'bounce_rate', 'keywords_count', 'top_keywords',
|
||||
'opportunity_score', 'estimated_traffic_gain',
|
||||
'title_recommendations', 'description_recommendations',
|
||||
'content_recommendations', 'estimated_effort_hours',
|
||||
'expected_position_improvement'
|
||||
]
|
||||
|
||||
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
||||
writer.writeheader()
|
||||
|
||||
for opp in sorted(opportunities, key=lambda x: x['opportunity_score'], reverse=True):
|
||||
row = {
|
||||
'ID': opp['id'],
|
||||
'Title': opp['title'],
|
||||
'URL': opp['url'],
|
||||
'avg_position': opp['avg_position'],
|
||||
'impressions': opp['impressions'],
|
||||
'clicks': opp['clicks'],
|
||||
'ctr': f"{opp['ctr']:.2%}",
|
||||
'traffic': opp['traffic'],
|
||||
'bounce_rate': opp['bounce_rate'],
|
||||
'keywords_count': opp['keywords_count'],
|
||||
'top_keywords': opp['top_keywords'],
|
||||
'opportunity_score': opp['opportunity_score'],
|
||||
'estimated_traffic_gain': opp['estimated_traffic_gain'],
|
||||
'title_recommendations': opp.get('title_recommendations_str', ''),
|
||||
'description_recommendations': opp.get('description_recommendations_str', ''),
|
||||
'content_recommendations': opp.get('content_recommendations_str', ''),
|
||||
'estimated_effort_hours': opp.get('estimated_effort_hours', ''),
|
||||
'expected_position_improvement': opp.get('expected_position_improvement', '')
|
||||
}
|
||||
writer.writerow(row)
|
||||
|
||||
self.log(f"✓ Exported {len(opportunities)} opportunities to {output_csv}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting CSV: {e}")
|
||||
|
||||
def export_log(self, log_file):
|
||||
"""Export analysis log."""
|
||||
try:
|
||||
with open(log_file, 'w', encoding='utf-8') as f:
|
||||
f.write("SEO Opportunity Analysis Report\n")
|
||||
f.write("=" * 60 + "\n\n")
|
||||
|
||||
for msg in self.logs:
|
||||
f.write(msg + "\n")
|
||||
|
||||
self.log(f"✓ Exported log to {log_file}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting log: {e}")
|
||||
|
||||
def run(self, posts_csv, output_csv, min_position=11, max_position=30,
|
||||
min_impressions=50, top_n=20):
|
||||
"""Run complete analysis workflow."""
|
||||
self.log("🔍 Starting keyword opportunity analysis...")
|
||||
self.log(f"Input: {posts_csv}")
|
||||
self.log(f"Position range: {min_position}-{max_position}")
|
||||
self.log(f"Min impressions: {min_impressions}")
|
||||
self.log(f"Top N for AI analysis: {top_n}\n")
|
||||
|
||||
# Load posts
|
||||
posts = self.load_posts(posts_csv)
|
||||
if not posts:
|
||||
return
|
||||
|
||||
# Filter opportunities
|
||||
opportunities = self.filter_opportunities(posts, min_position, max_position, min_impressions)
|
||||
if not opportunities:
|
||||
self.log("⚠️ No opportunities found in specified range")
|
||||
return
|
||||
|
||||
# Calculate scores
|
||||
self.log("\n📊 Calculating opportunity scores...")
|
||||
for opp in opportunities:
|
||||
opp['opportunity_score'] = self.calculate_opportunity_score(opp)
|
||||
opp['estimated_traffic_gain'] = self.estimate_traffic_gain(opp)
|
||||
|
||||
# Sort by score
|
||||
opportunities = sorted(opportunities, key=lambda x: x['opportunity_score'], reverse=True)
|
||||
|
||||
# Get AI recommendations for top N
|
||||
self.log(f"\n🤖 Generating AI recommendations for top {min(top_n, len(opportunities))} opportunities...")
|
||||
for i, opp in enumerate(opportunities[:top_n]):
|
||||
self.log(f" [{i+1}/{min(top_n, len(opportunities))}] {opp['title'][:50]}...")
|
||||
recommendations = self.generate_ai_recommendations(opp)
|
||||
|
||||
if recommendations:
|
||||
opp['title_recommendations_str'] = '; '.join(recommendations.get('title_recommendations', []))
|
||||
opp['description_recommendations_str'] = '; '.join(recommendations.get('description_recommendations', []))
|
||||
opp['content_recommendations_str'] = '; '.join(recommendations.get('content_recommendations', []))
|
||||
opp['estimated_effort_hours'] = recommendations.get('estimated_effort_hours', '')
|
||||
opp['expected_position_improvement'] = recommendations.get('expected_position_improvement', '')
|
||||
|
||||
time.sleep(0.2) # Rate limiting
|
||||
|
||||
# Export
|
||||
self.log("\n📁 Exporting results...")
|
||||
self.export_opportunities_csv(opportunities, output_csv)
|
||||
|
||||
# Export log
|
||||
log_dir = self.output_dir / 'logs'
|
||||
log_dir.mkdir(exist_ok=True)
|
||||
log_file = log_dir / 'opportunity_analysis_log.txt'
|
||||
self.export_log(log_file)
|
||||
|
||||
self.log(f"\n✓ Analysis complete! {len(opportunities)} opportunities identified.")
|
||||
self.log(f" Top opportunity: {opportunities[0]['title'][:50]}... (score: {opportunities[0]['opportunity_score']})")
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(description='Analyze keyword opportunities')
|
||||
parser.add_argument('--input', type=Path,
|
||||
default=Path('output/results/posts_with_analytics.csv'),
|
||||
help='Input posts CSV')
|
||||
parser.add_argument('--output', type=Path,
|
||||
default=Path('output/results/keyword_opportunities.csv'),
|
||||
help='Output opportunities CSV')
|
||||
parser.add_argument('--min-position', type=int, default=11,
|
||||
help='Minimum position (start of range)')
|
||||
parser.add_argument('--max-position', type=int, default=30,
|
||||
help='Maximum position (end of range)')
|
||||
parser.add_argument('--min-impressions', type=int, default=50,
|
||||
help='Minimum impressions to consider')
|
||||
parser.add_argument('--top-n', type=int, default=20,
|
||||
help='Top N for AI recommendations')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = OpportunityAnalyzer()
|
||||
analyzer.run(args.input, args.output, args.min_position, args.max_position,
|
||||
args.min_impressions, args.top_n)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,436 +0,0 @@
|
||||
"""
|
||||
SEO optimization report generator.
|
||||
Consolidates all analysis into comprehensive markdown report and action plan.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from config import Config
|
||||
|
||||
|
||||
class ReportGenerator:
|
||||
"""Generate comprehensive SEO optimization report."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize generator."""
|
||||
self.config = Config
|
||||
self.output_dir = self.config.OUTPUT_DIR
|
||||
self.logs = []
|
||||
|
||||
def log(self, message):
|
||||
"""Add message to log."""
|
||||
self.logs.append(message)
|
||||
print(message)
|
||||
|
||||
def load_posts_with_analytics(self, csv_path):
|
||||
"""Load posts with all analytics data."""
|
||||
posts = {}
|
||||
if not csv_path.exists():
|
||||
self.log(f"❌ File not found: {csv_path}")
|
||||
return posts
|
||||
|
||||
try:
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
post_id = row.get('ID')
|
||||
if not post_id:
|
||||
continue
|
||||
|
||||
# Handle different title column names
|
||||
title = (row.get('Title') or
|
||||
row.get('title') or
|
||||
row.get('post_title') or '')
|
||||
|
||||
posts[post_id] = {
|
||||
'title': title,
|
||||
'url': row.get('URL') or row.get('url') or row.get('post_url') or '',
|
||||
'seo_title': row.get('SEO Title') or row.get('seo_title') or '',
|
||||
'meta_description': row.get('Meta Description') or row.get('meta_description') or '',
|
||||
'traffic': int(row.get('traffic', 0) or 0),
|
||||
'users': int(row.get('users', 0) or 0),
|
||||
'bounce_rate': float(row.get('bounce_rate', 0) or 0),
|
||||
'impressions': int(row.get('impressions', 0) or 0),
|
||||
'clicks': int(row.get('clicks', 0) or 0),
|
||||
'avg_position': float(row.get('avg_position', 0) or 0),
|
||||
'ctr': float(row.get('ctr', 0) or 0),
|
||||
'keywords_count': int(row.get('keywords_count', 0) or 0),
|
||||
'top_keywords': row.get('top_keywords', '')
|
||||
}
|
||||
|
||||
self.log(f"✓ Loaded {len(posts)} posts")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error reading posts: {e}")
|
||||
|
||||
return posts
|
||||
|
||||
def load_opportunities(self, csv_path):
|
||||
"""Load keyword opportunities."""
|
||||
opportunities = {}
|
||||
if not csv_path.exists():
|
||||
self.log(f"⚠️ Opportunities file not found: {csv_path}")
|
||||
return opportunities
|
||||
|
||||
try:
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
post_id = row.get('ID')
|
||||
if post_id:
|
||||
try:
|
||||
opportunities[post_id] = {
|
||||
'opportunity_score': float(row.get('opportunity_score', 0) or 0),
|
||||
'estimated_traffic_gain': int(float(row.get('estimated_traffic_gain', 0) or 0)),
|
||||
'title_recommendations': row.get('title_recommendations', ''),
|
||||
'description_recommendations': row.get('description_recommendations', ''),
|
||||
'content_recommendations': row.get('content_recommendations', '')
|
||||
}
|
||||
except (ValueError, TypeError):
|
||||
# Skip rows with parsing errors
|
||||
continue
|
||||
|
||||
self.log(f"✓ Loaded {len(opportunities)} opportunities")
|
||||
except Exception as e:
|
||||
self.log(f"⚠️ Error reading opportunities: {e}")
|
||||
|
||||
return opportunities
|
||||
|
||||
def load_content_gaps(self, csv_path):
|
||||
"""Load content gap suggestions."""
|
||||
gaps = []
|
||||
if not csv_path.exists():
|
||||
self.log(f"⚠️ Content gaps file not found: {csv_path}")
|
||||
return gaps
|
||||
|
||||
try:
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
gaps.append({
|
||||
'title': row.get('title', ''),
|
||||
'why_valuable': row.get('why_valuable', ''),
|
||||
'search_volume': row.get('search_volume', ''),
|
||||
'format': row.get('format', ''),
|
||||
'traffic_potential': int(row.get('traffic_potential', 0) or 0),
|
||||
'priority': row.get('priority', 'medium')
|
||||
})
|
||||
|
||||
self.log(f"✓ Loaded {len(gaps)} content gap ideas")
|
||||
except Exception as e:
|
||||
self.log(f"⚠️ Error reading content gaps: {e}")
|
||||
|
||||
return gaps
|
||||
|
||||
def calculate_priority_score(self, post, opportunity=None):
|
||||
"""Calculate comprehensive priority score (0-100)."""
|
||||
position = post.get('avg_position', 50)
|
||||
impressions = post.get('impressions', 0)
|
||||
ctr = post.get('ctr', 0)
|
||||
traffic = post.get('traffic', 0)
|
||||
|
||||
# Position score (35%): Closer to page 1 = higher
|
||||
if position > 0 and position <= 30:
|
||||
position_score = max(0, (30 - position) / 29 * 35)
|
||||
else:
|
||||
position_score = 0
|
||||
|
||||
# Traffic potential (30%): Based on impressions
|
||||
traffic_potential = min(30, (impressions / 1000) * 30)
|
||||
|
||||
# CTR improvement (20%): Gap vs expected
|
||||
expected_ctr_map = {
|
||||
1: 0.30, 2: 0.16, 3: 0.11, 4: 0.08, 5: 0.07,
|
||||
6: 0.06, 7: 0.05, 8: 0.05, 9: 0.04, 10: 0.04,
|
||||
11: 0.02, 12: 0.02, 13: 0.015, 14: 0.015, 15: 0.013,
|
||||
16: 0.012, 17: 0.011, 18: 0.01, 19: 0.009, 20: 0.008
|
||||
}
|
||||
expected_ctr = expected_ctr_map.get(int(position), 0.005) if position > 0 else 0
|
||||
if expected_ctr > 0:
|
||||
ctr_gap = max(0, expected_ctr - ctr)
|
||||
ctr_score = min(20, (ctr_gap / expected_ctr * 100 / 5) * 20)
|
||||
else:
|
||||
ctr_score = 0
|
||||
|
||||
# Content quality (15%): Existing traffic and engagement
|
||||
quality_score = min(15, (traffic / 100) * 7.5 +
|
||||
(100 - post.get('bounce_rate', 50)) / 100 * 7.5)
|
||||
|
||||
total = round(position_score + traffic_potential + ctr_score + quality_score, 1)
|
||||
return max(0, min(100, total))
|
||||
|
||||
def generate_markdown_report(self, posts, opportunities, gaps, top_n=20):
|
||||
"""Generate comprehensive markdown report."""
|
||||
report = []
|
||||
report.append("# SEO Optimization Strategy Report\n")
|
||||
report.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n")
|
||||
|
||||
# Calculate metrics
|
||||
total_traffic = sum(p.get('traffic', 0) for p in posts.values())
|
||||
total_impressions = sum(p.get('impressions', 0) for p in posts.values())
|
||||
avg_position = sum(p.get('avg_position', 50) for p in posts.values() if p.get('avg_position', 0) > 0) / max(1, len([p for p in posts.values() if p.get('avg_position', 0) > 0]))
|
||||
|
||||
# Executive Summary
|
||||
report.append("## Executive Summary\n")
|
||||
report.append(f"- **Total Posts Analyzed:** {len(posts)}\n")
|
||||
report.append(f"- **Current Monthly Traffic:** {total_traffic:,} visits\n")
|
||||
report.append(f"- **Total Impressions (90d):** {total_impressions:,}\n")
|
||||
report.append(f"- **Average Search Position:** {avg_position:.1f}\n")
|
||||
report.append(f"- **Optimization Opportunities:** {len(opportunities)}\n")
|
||||
report.append(f"- **Content Gap Ideas:** {len(gaps)}\n")
|
||||
report.append(f"- **Potential Traffic Gain (Phase 1):** +{sum(o.get('estimated_traffic_gain', 0) for o in opportunities.values()):,} visits/month\n\n")
|
||||
|
||||
# Key Metrics
|
||||
report.append("### Quick Wins (Estimated Impact)\n\n")
|
||||
quick_wins = sorted(opportunities.values(),
|
||||
key=lambda x: x.get('estimated_traffic_gain', 0),
|
||||
reverse=True)[:5]
|
||||
total_quick_win_traffic = sum(w.get('estimated_traffic_gain', 0) for w in quick_wins)
|
||||
report.append(f"Top 5 opportunities could bring **+{total_quick_win_traffic:,} visits/month**\n\n")
|
||||
|
||||
# Top 20 Posts to Optimize
|
||||
report.append("## Top 20 Posts to Optimize\n\n")
|
||||
report.append("Ranked by optimization potential (combination of position, traffic potential, and CTR improvement).\n\n")
|
||||
|
||||
# Score all posts
|
||||
scored_posts = []
|
||||
for post_id, post in posts.items():
|
||||
opp = opportunities.get(post_id, {})
|
||||
score = self.calculate_priority_score(post, opp)
|
||||
scored_posts.append((post_id, post, opp, score))
|
||||
|
||||
scored_posts = sorted(scored_posts, key=lambda x: x[3], reverse=True)
|
||||
|
||||
for i, (post_id, post, opp, score) in enumerate(scored_posts[:top_n], 1):
|
||||
position = post.get('avg_position', 0)
|
||||
impressions = post.get('impressions', 0)
|
||||
traffic = post.get('traffic', 0)
|
||||
|
||||
report.append(f"### {i}. {post['title']}\n\n")
|
||||
report.append(f"**Current Position:** {position:.1f} | **Impressions:** {impressions:,} | **Traffic:** {traffic} visits\n")
|
||||
report.append(f"**Priority Score:** {score:.1f}/100 | **Estimated Gain:** +{opp.get('estimated_traffic_gain', 0)} visits\n\n")
|
||||
|
||||
if position > 0 and position <= 30:
|
||||
report.append(f"**Status:** Ranking on {'page 1' if position <= 10 else 'page 2-3'}\n\n")
|
||||
|
||||
if opp.get('title_recommendations'):
|
||||
report.append("**Title Optimization:**\n")
|
||||
for rec in opp['title_recommendations'].split(';'):
|
||||
rec = rec.strip()
|
||||
if rec:
|
||||
report.append(f"- {rec}\n")
|
||||
report.append("\n")
|
||||
|
||||
if opp.get('description_recommendations'):
|
||||
report.append("**Meta Description:**\n")
|
||||
for rec in opp['description_recommendations'].split(';'):
|
||||
rec = rec.strip()
|
||||
if rec:
|
||||
report.append(f"- {rec}\n")
|
||||
report.append("\n")
|
||||
|
||||
if opp.get('content_recommendations'):
|
||||
report.append("**Content Improvements:**\n")
|
||||
for rec in opp['content_recommendations'].split(';'):
|
||||
rec = rec.strip()
|
||||
if rec:
|
||||
report.append(f"- {rec}\n")
|
||||
report.append("\n")
|
||||
|
||||
report.append("---\n\n")
|
||||
|
||||
# Keyword Opportunities Summary
|
||||
report.append("## Keyword Opportunities Summary\n\n")
|
||||
opportunity_categories = {
|
||||
'page_2': [],
|
||||
'page_3': [],
|
||||
'ready_for_optimization': []
|
||||
}
|
||||
|
||||
for opp_id, opp in opportunities.items():
|
||||
if any(opp_id == p[0] for p in scored_posts[:top_n]):
|
||||
score = opp.get('opportunity_score', 0)
|
||||
post = posts.get(opp_id, {})
|
||||
position = post.get('avg_position', 0)
|
||||
|
||||
if 11 <= position <= 15:
|
||||
opportunity_categories['page_2'].append((score, opp))
|
||||
elif 16 <= position <= 30:
|
||||
opportunity_categories['page_3'].append((score, opp))
|
||||
|
||||
report.append(f"**Page 2 (Positions 11-15):** {len(opportunity_categories['page_2'])} keywords ready for quick wins\n")
|
||||
report.append(f"**Page 3+ (Positions 16-30):** {len(opportunity_categories['page_3'])} keywords with medium effort\n\n")
|
||||
|
||||
# Content Gap Analysis
|
||||
report.append("## Content Gap Analysis\n\n")
|
||||
report.append(f"Identified **{len(gaps)} high-value content opportunities** not currently covered:\n\n")
|
||||
|
||||
for i, gap in enumerate(sorted(gaps, key=lambda x: x.get('priority') == 'high', reverse=True)[:15], 1):
|
||||
report.append(f"### {i}. {gap['title']}\n\n")
|
||||
report.append(f"**Priority:** {gap.get('priority', 'medium').upper()}\n")
|
||||
report.append(f"**Search Volume:** {gap.get('search_volume', 'medium')}\n")
|
||||
report.append(f"**Format:** {gap.get('format', 'guide')}\n")
|
||||
report.append(f"**Estimated Traffic Potential:** +{gap.get('traffic_potential', 50)} visits/month\n\n")
|
||||
|
||||
if gap.get('why_valuable'):
|
||||
report.append(f"**Why valuable:** {gap['why_valuable']}\n\n")
|
||||
|
||||
# 90-Day Action Plan
|
||||
report.append("## 90-Day Action Plan\n\n")
|
||||
report.append("### Week 1-2: Quick Wins (Estimated +100 visits/month)\n\n")
|
||||
report.append("Focus on posts with highest opportunity scores that are already ranking on page 2:\n\n")
|
||||
quick_wins_phase = sorted(scored_posts[:top_n], key=lambda x: x[3], reverse=True)[:5]
|
||||
for i, (post_id, post, opp, score) in enumerate(quick_wins_phase, 1):
|
||||
report.append(f"{i}. **{post['title'][:60]}**\n")
|
||||
report.append(f" - Update SEO title and meta description\n")
|
||||
report.append(f" - Estimated effort: 30-60 minutes\n")
|
||||
report.append(f" - Expected gain: +{opp.get('estimated_traffic_gain', 50)} visits\n\n")
|
||||
|
||||
report.append("### Week 3-4: Core Content Optimization (Estimated +150 visits/month)\n\n")
|
||||
report.append("Improve content structure and internal linking:\n\n")
|
||||
mid_phase = sorted(scored_posts[5:15], key=lambda x: x[3], reverse=True)[:5]
|
||||
for i, (post_id, post, opp, score) in enumerate(mid_phase, 1):
|
||||
report.append(f"{i}. **{post['title'][:60]}**\n")
|
||||
report.append(f" - Add missing content sections\n")
|
||||
report.append(f" - Improve header structure\n")
|
||||
report.append(f" - Estimated effort: 2-3 hours\n\n")
|
||||
|
||||
report.append("### Week 5-8: New Content Creation (Estimated +300 visits/month)\n\n")
|
||||
report.append("Create 3-5 pieces of new content targeting high-value gaps:\n\n")
|
||||
for i, gap in enumerate(sorted(gaps, key=lambda x: x.get('traffic_potential', 0), reverse=True)[:4], 1):
|
||||
report.append(f"{i}. **{gap['title']}** ({gap.get('format', 'guide').title()})\n")
|
||||
report.append(f" - Estimated effort: 4-6 hours\n")
|
||||
report.append(f" - Expected traffic: +{gap.get('traffic_potential', 50)} visits/month\n\n")
|
||||
|
||||
report.append("### Week 9-12: Refinement & Analysis (Estimated +100 visits/month)\n\n")
|
||||
report.append("- Monitor ranking changes and CTR improvements\n")
|
||||
report.append("- Refine underperforming optimizations\n")
|
||||
report.append("- Re-run keyword analysis to identify new opportunities\n\n")
|
||||
|
||||
report.append("**Total Estimated 90-Day Impact: +650 visits/month (+~7.8% growth)**\n\n")
|
||||
|
||||
# Methodology
|
||||
report.append("## Methodology\n\n")
|
||||
report.append("### Priority Score Calculation\n\n")
|
||||
report.append("Each post is scored based on:\n")
|
||||
report.append("- **Position (35%):** Posts ranking 11-20 get highest scores (closest to page 1)\n")
|
||||
report.append("- **Traffic Potential (30%):** Based on search impressions\n")
|
||||
report.append("- **CTR Gap (20%):** Difference between current and expected CTR for position\n")
|
||||
report.append("- **Content Quality (15%):** Existing traffic and bounce rate\n\n")
|
||||
|
||||
report.append("### Data Sources\n\n")
|
||||
report.append("- **Google Analytics:** Traffic metrics (90-day window)\n")
|
||||
report.append("- **Google Search Console:** Keyword data, impressions, clicks, positions\n")
|
||||
report.append("- **WordPress REST API:** Current SEO metadata and content structure\n\n")
|
||||
|
||||
report.append("### Assumptions\n\n")
|
||||
report.append("- Traffic estimates are based on historical CTR and position data\n")
|
||||
report.append("- Moving one position up typically improves CTR by 20-30%\n")
|
||||
report.append("- Page 1 rankings (positions 1-10) receive ~20-30% of total impressions\n")
|
||||
report.append("- New content takes 4-8 weeks to gain significant traction\n\n")
|
||||
|
||||
return "\n".join(report)
|
||||
|
||||
def export_report(self, report_text, output_md):
|
||||
"""Export markdown report."""
|
||||
try:
|
||||
with open(output_md, 'w', encoding='utf-8') as f:
|
||||
f.write(report_text)
|
||||
|
||||
self.log(f"✓ Exported report to {output_md}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting report: {e}")
|
||||
|
||||
def export_prioritized_csv(self, posts, opportunities, output_csv):
|
||||
"""Export all posts with priority scores."""
|
||||
try:
|
||||
scored_posts = []
|
||||
for post_id, post in posts.items():
|
||||
opp = opportunities.get(post_id, {})
|
||||
score = self.calculate_priority_score(post, opp)
|
||||
|
||||
scored_posts.append({
|
||||
'ID': post_id,
|
||||
'Title': post.get('title', ''),
|
||||
'URL': post.get('url', ''),
|
||||
'Priority_Score': score,
|
||||
'Estimated_Traffic_Gain': opp.get('estimated_traffic_gain', 0),
|
||||
'Current_Position': post.get('avg_position', 0),
|
||||
'Impressions': post.get('impressions', 0),
|
||||
'Traffic': post.get('traffic', 0),
|
||||
'CTR': f"{post.get('ctr', 0):.2%}",
|
||||
'Keywords_Count': post.get('keywords_count', 0)
|
||||
})
|
||||
|
||||
scored_posts = sorted(scored_posts, key=lambda x: x['Priority_Score'], reverse=True)
|
||||
|
||||
fieldnames = ['ID', 'Title', 'URL', 'Priority_Score', 'Estimated_Traffic_Gain',
|
||||
'Current_Position', 'Impressions', 'Traffic', 'CTR', 'Keywords_Count']
|
||||
|
||||
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(scored_posts)
|
||||
|
||||
self.log(f"✓ Exported {len(scored_posts)} prioritized posts to {output_csv}")
|
||||
except Exception as e:
|
||||
self.log(f"❌ Error exporting prioritized CSV: {e}")
|
||||
|
||||
def run(self, posts_csv, opportunities_csv, gaps_csv, output_md, output_prioritized_csv, top_n=20):
|
||||
"""Run complete report generation workflow."""
|
||||
self.log("📊 Generating SEO optimization report...")
|
||||
self.log(f"Input files: posts_with_analytics, opportunities, content_gaps\n")
|
||||
|
||||
# Load data
|
||||
posts = self.load_posts_with_analytics(posts_csv)
|
||||
opportunities = self.load_opportunities(opportunities_csv)
|
||||
gaps = self.load_content_gaps(gaps_csv)
|
||||
|
||||
if not posts:
|
||||
self.log("❌ No posts loaded. Cannot generate report.")
|
||||
return
|
||||
|
||||
# Generate report
|
||||
self.log("\n📝 Generating markdown report...")
|
||||
report_text = self.generate_markdown_report(posts, opportunities, gaps, top_n)
|
||||
|
||||
# Export report
|
||||
self.log("\n📁 Exporting files...")
|
||||
self.export_report(report_text, output_md)
|
||||
self.export_prioritized_csv(posts, opportunities, output_prioritized_csv)
|
||||
|
||||
self.log("\n✓ Report generation complete!")
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(description='Generate SEO optimization report')
|
||||
parser.add_argument('--posts-with-analytics', type=Path,
|
||||
default=Path('output/results/posts_with_analytics.csv'),
|
||||
help='Posts with analytics CSV')
|
||||
parser.add_argument('--keyword-opportunities', type=Path,
|
||||
default=Path('output/results/keyword_opportunities.csv'),
|
||||
help='Keyword opportunities CSV')
|
||||
parser.add_argument('--content-gaps', type=Path,
|
||||
default=Path('output/results/content_gaps.csv'),
|
||||
help='Content gaps CSV')
|
||||
parser.add_argument('--output-report', type=Path,
|
||||
default=Path('output/results/seo_optimization_report.md'),
|
||||
help='Output markdown report')
|
||||
parser.add_argument('--output-csv', type=Path,
|
||||
default=Path('output/results/posts_prioritized.csv'),
|
||||
help='Output prioritized posts CSV')
|
||||
parser.add_argument('--top-n', type=int, default=20,
|
||||
help='Number of top posts to detail')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
generator = ReportGenerator()
|
||||
generator.run(args.posts_with_analytics, args.keyword_opportunities,
|
||||
args.content_gaps, args.output_report, args.output_csv, args.top_n)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,73 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "╔════════════════════════════════════════════════════════════╗"
|
||||
echo "║ SEO Analysis & Improvement System - Full Pipeline ║"
|
||||
echo "╚════════════════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
# Check if venv exists
|
||||
if [ ! -d "venv" ]; then
|
||||
echo "❌ Virtual environment not found. Please run: python3 -m venv venv"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if input files exist
|
||||
if [ ! -f "input/new-propositions.csv" ]; then
|
||||
echo "❌ Missing input/new-propositions.csv"
|
||||
echo "Please place your WordPress posts CSV in input/ directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "input/analytics/ga4_export.csv" ]; then
|
||||
echo "❌ Missing input/analytics/ga4_export.csv"
|
||||
echo "Please export GA4 data and place it in input/analytics/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create output directories
|
||||
mkdir -p output/results
|
||||
mkdir -p output/logs
|
||||
|
||||
echo "📊 Step 1: Analytics Integration"
|
||||
echo " Merging GA4, Search Console, and WordPress data..."
|
||||
./venv/bin/python analytics_importer.py
|
||||
echo ""
|
||||
|
||||
echo "🔍 Step 2: Keyword Opportunity Analysis"
|
||||
echo " Identifying high-potential optimization opportunities..."
|
||||
./venv/bin/python opportunity_analyzer.py \
|
||||
--input output/results/posts_with_analytics.csv \
|
||||
--output output/results/keyword_opportunities.csv \
|
||||
--min-position 11 \
|
||||
--max-position 30 \
|
||||
--min-impressions 50 \
|
||||
--top-n 20
|
||||
echo ""
|
||||
|
||||
echo "📝 Step 3: Report Generation"
|
||||
echo " Creating comprehensive SEO optimization report..."
|
||||
./venv/bin/python report_generator.py
|
||||
echo ""
|
||||
|
||||
echo "╔════════════════════════════════════════════════════════════╗"
|
||||
echo "║ ✅ Analysis Complete! ║"
|
||||
echo "╚════════════════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
echo "📂 Results Location:"
|
||||
echo " └─ output/results/seo_optimization_report.md"
|
||||
echo ""
|
||||
echo "📊 Key Files:"
|
||||
echo " ├─ posts_prioritized.csv (all posts ranked 0-100)"
|
||||
echo " ├─ keyword_opportunities.csv (26 optimization opportunities)"
|
||||
echo " └─ posts_with_analytics.csv (enriched dataset)"
|
||||
echo ""
|
||||
echo "📋 Logs:"
|
||||
echo " └─ output/logs/"
|
||||
echo ""
|
||||
echo "🚀 Next Steps:"
|
||||
echo " 1. Open: output/results/seo_optimization_report.md"
|
||||
echo " 2. Review Top 20 Posts to Optimize"
|
||||
echo " 3. Start with Quick Wins (positions 11-15)"
|
||||
echo " 4. Follow 90-day action plan"
|
||||
echo ""
|
||||
@@ -1,388 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
DEPRECATED: SEO Automation CLI
|
||||
|
||||
This script is deprecated. Please use the new unified CLI:
|
||||
- ./seo export
|
||||
- ./seo analyze
|
||||
- ./seo seo_check
|
||||
- ./seo categories
|
||||
- ./seo full_pipeline
|
||||
|
||||
To see all commands: ./seo help
|
||||
"""
|
||||
|
||||
import sys
|
||||
import subprocess
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from config import Config
|
||||
import os
|
||||
|
||||
class SEOCLI:
|
||||
"""DEPRECATED: Main CLI orchestrator for SEO workflows. Use new ./seo CLI instead."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize CLI."""
|
||||
print("⚠️ DEPRECATION WARNING: This CLI is deprecated. Use ./seo instead.")
|
||||
print(" Run './seo help' to see new commands.")
|
||||
self.scripts_dir = Path(__file__).parent
|
||||
self.project_dir = self.scripts_dir.parent
|
||||
self.output_dir = self.project_dir / 'output' / 'reports'
|
||||
|
||||
def run_command(self, command, description):
|
||||
"""Run a command and show progress."""
|
||||
print(f"\n{'='*70}")
|
||||
print(f"▶ {description}")
|
||||
print(f"{'='*70}\n")
|
||||
|
||||
try:
|
||||
result = subprocess.run(command, shell=True, cwd=self.project_dir)
|
||||
if result.returncode != 0:
|
||||
print(f"\n❌ Error running: {description}")
|
||||
return False
|
||||
print(f"\n✓ {description} completed successfully")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}")
|
||||
return False
|
||||
|
||||
def get_latest_file(self, pattern):
|
||||
"""Get most recent file matching pattern."""
|
||||
import glob
|
||||
# Support both old and new naming patterns
|
||||
files = glob.glob(str(self.output_dir / pattern))
|
||||
if not files:
|
||||
# Try new pattern
|
||||
files = glob.glob(str(self.output_dir / "all_posts_*.csv"))
|
||||
if not files:
|
||||
return None
|
||||
return max(files, key=os.path.getctime)
|
||||
|
||||
def export_posts(self):
|
||||
"""Export all posts to CSV."""
|
||||
cmd = f"python {self.scripts_dir}/export_posts_for_ai_decision.py"
|
||||
return self.run_command(cmd, "STEP 1: Export All Posts")
|
||||
|
||||
def analyze_with_ai(self, csv_file=None):
|
||||
"""Analyze exported posts with AI."""
|
||||
if not csv_file:
|
||||
csv_file = self.get_latest_file("all_posts_for_ai_decision_*.csv")
|
||||
|
||||
if not csv_file:
|
||||
print("\n❌ No exported CSV found. Run 'seo-cli export' first.")
|
||||
return False
|
||||
|
||||
cmd = f"python {self.scripts_dir}/ai_analyze_posts_for_decisions.py \"{csv_file}\""
|
||||
return self.run_command(cmd, "STEP 2: Analyze with AI")
|
||||
|
||||
def recategorize_with_ai(self, csv_file=None):
|
||||
"""Recategorize posts using AI."""
|
||||
if not csv_file:
|
||||
csv_file = self.get_latest_file("all_posts_for_ai_decision_*.csv")
|
||||
|
||||
if not csv_file:
|
||||
print("\n❌ No exported CSV found. Run 'seo-cli export' first.")
|
||||
return False
|
||||
|
||||
cmd = f"python {self.scripts_dir}/ai_recategorize_posts.py \"{csv_file}\""
|
||||
return self.run_command(cmd, "Recategorizing Posts with AI")
|
||||
|
||||
def seo_check(self, top_n=None):
|
||||
"""Check SEO quality of titles and meta descriptions."""
|
||||
cmd = f"python {self.scripts_dir}/multi_site_seo_analyzer.py"
|
||||
if top_n:
|
||||
cmd += f" --top-n {top_n}"
|
||||
|
||||
return self.run_command(cmd, f"SEO Quality Check (Top {top_n or 'All'} posts)")
|
||||
|
||||
def import_analytics(self, ga_export, gsc_export, posts_csv=None):
|
||||
"""Import analytics data."""
|
||||
if not posts_csv:
|
||||
posts_csv = self.get_latest_file("all_posts_for_ai_decision_*.csv")
|
||||
|
||||
if not posts_csv:
|
||||
print("\n❌ No posts CSV found. Run 'seo-cli export' first.")
|
||||
return False
|
||||
|
||||
cmd = (
|
||||
f"python {self.scripts_dir}/analytics_importer.py "
|
||||
f"--ga-export \"{ga_export}\" "
|
||||
f"--gsc-export \"{gsc_export}\" "
|
||||
f"--posts-csv \"{posts_csv}\" "
|
||||
f"--output output/posts_with_analytics.csv"
|
||||
)
|
||||
return self.run_command(cmd, "STEP: Import Analytics Data")
|
||||
|
||||
def full_pipeline(self, analyze=True, seo=True):
|
||||
"""Run complete pipeline: export → analyze → seo check."""
|
||||
steps = [
|
||||
("Export", self.export_posts),
|
||||
]
|
||||
|
||||
if analyze:
|
||||
steps.append(("Analyze", self.analyze_with_ai))
|
||||
|
||||
if seo:
|
||||
steps.append(("SEO Check", self.seo_check))
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("🚀 STARTING FULL PIPELINE")
|
||||
print("="*70)
|
||||
print(f"\nSteps to run: {', '.join([s[0] for s in steps])}\n")
|
||||
|
||||
completed = 0
|
||||
for name, func in steps:
|
||||
if func():
|
||||
completed += 1
|
||||
else:
|
||||
print(f"\n⚠️ Pipeline stopped at: {name}")
|
||||
return False
|
||||
|
||||
print("\n" + "="*70)
|
||||
print(f"✓ PIPELINE COMPLETE - All {completed} steps succeeded!")
|
||||
print("="*70)
|
||||
print("\nNext steps:")
|
||||
print("1. Review results in output/reports/")
|
||||
print("2. Check: posts_with_ai_recommendations_*.csv")
|
||||
print("3. Follow AI recommendations to optimize your content")
|
||||
return True
|
||||
|
||||
def manage_categories(self):
|
||||
"""Run category management with AI recommendations."""
|
||||
cmd = f"python {self.scripts_dir}/category_manager.py"
|
||||
return self.run_command(cmd, "Category Management with AI Recommendations")
|
||||
|
||||
def approve_recommendations(self, csv_files=None):
|
||||
"""Approve recommendations from CSV files."""
|
||||
if not csv_files:
|
||||
print("\n❌ No CSV files provided for approval.")
|
||||
return False
|
||||
|
||||
# Join the CSV files into a single command argument
|
||||
csv_files_str = " ".join(f'"{csv_file}"' for csv_file in csv_files)
|
||||
cmd = f"python {self.scripts_dir}/user_approval.py {csv_files_str}"
|
||||
return self.run_command(cmd, f"Approving Recommendations from {len(csv_files)} files")
|
||||
|
||||
def show_status(self):
|
||||
"""Show status of output files."""
|
||||
print("\n" + "="*70)
|
||||
print("📊 OUTPUT FILES STATUS")
|
||||
print("="*70 + "\n")
|
||||
|
||||
import glob
|
||||
files = glob.glob(str(self.output_dir / "*"))
|
||||
|
||||
if not files:
|
||||
print("No output files yet. Run 'seo-cli export' to get started.\n")
|
||||
return
|
||||
|
||||
# Sort by date
|
||||
files.sort(key=os.path.getctime, reverse=True)
|
||||
|
||||
for file in files[:10]: # Show last 10 files
|
||||
size = os.path.getsize(file) / 1024 # KB
|
||||
mtime = os.path.getmtime(file)
|
||||
from datetime import datetime
|
||||
date = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
||||
filename = os.path.basename(file)
|
||||
|
||||
print(f" {filename}")
|
||||
print(f" Size: {size:.1f} KB | Modified: {date}")
|
||||
print()
|
||||
|
||||
def list_workflows(self):
|
||||
"""List available workflows."""
|
||||
workflows = {
|
||||
'export': {
|
||||
'description': 'Export all posts from your 3 WordPress sites',
|
||||
'command': 'seo-cli export',
|
||||
'time': '5-10 min',
|
||||
'cost': 'Free'
|
||||
},
|
||||
'analyze': {
|
||||
'description': 'Analyze exported posts with Claude AI',
|
||||
'command': 'seo-cli analyze',
|
||||
'time': '5-15 min',
|
||||
'cost': '$1.50-2.00'
|
||||
},
|
||||
'recategorize': {
|
||||
'description': 'Re-categorize posts for better organization',
|
||||
'command': 'seo-cli recategorize',
|
||||
'time': '5-15 min',
|
||||
'cost': '$1.50-2.00'
|
||||
},
|
||||
'seo-check': {
|
||||
'description': 'Check SEO quality of titles and descriptions',
|
||||
'command': 'seo-cli seo-check [--top-n 50]',
|
||||
'time': '3-5 min',
|
||||
'cost': 'Free or $0.20-0.50'
|
||||
},
|
||||
'analytics': {
|
||||
'description': 'Combine Google Analytics & Search Console data',
|
||||
'command': 'seo-cli analytics GA4.csv GSC.csv',
|
||||
'time': '5 min',
|
||||
'cost': 'Free'
|
||||
},
|
||||
'full-pipeline': {
|
||||
'description': 'Run complete pipeline: export → analyze → seo-check',
|
||||
'command': 'seo-cli full-pipeline',
|
||||
'time': '15-30 min',
|
||||
'cost': '$1.50-2.50'
|
||||
},
|
||||
'categories': {
|
||||
'description': 'Manage categories across all sites with AI recommendations',
|
||||
'command': 'seo-cli categories',
|
||||
'time': '10-20 min',
|
||||
'cost': '$0.50-1.00'
|
||||
},
|
||||
'approve': {
|
||||
'description': 'Review and approve SEO recommendations',
|
||||
'command': 'seo-cli approve [csv_file1] [csv_file2]',
|
||||
'time': 'Variable',
|
||||
'cost': 'Free'
|
||||
}
|
||||
}
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("📋 AVAILABLE WORKFLOWS")
|
||||
print("="*70 + "\n")
|
||||
|
||||
for name, info in workflows.items():
|
||||
print(f"🔹 {name.upper()}")
|
||||
print(f" {info['description']}")
|
||||
print(f" Command: {info['command']}")
|
||||
print(f" Time: {info['time']} | Cost: {info['cost']}")
|
||||
print()
|
||||
|
||||
def show_help(self):
|
||||
"""Show help message."""
|
||||
print("\n" + "="*70)
|
||||
print("🚀 SEO AUTOMATION CLI - Workflow Orchestrator")
|
||||
print("="*70 + "\n")
|
||||
|
||||
print("QUICK START:")
|
||||
print(" seo-cli full-pipeline Run complete workflow")
|
||||
print(" seo-cli export Export all posts")
|
||||
print(" seo-cli analyze Analyze with AI")
|
||||
print(" seo-cli recategorize Re-categorize posts with AI")
|
||||
print(" seo-cli seo-check Check SEO quality")
|
||||
print()
|
||||
|
||||
print("CHAINING WORKFLOWS:")
|
||||
print(" seo-cli export && seo-cli analyze && seo-cli seo-check")
|
||||
print()
|
||||
|
||||
print("ADVANCED:")
|
||||
print(" seo-cli seo-check --top-n 50 Check top 50 posts")
|
||||
print(" seo-cli analytics GA4.csv GSC.csv Import analytics data")
|
||||
print(" seo-cli status Show output files")
|
||||
print(" seo-cli list List all workflows")
|
||||
print()
|
||||
|
||||
print("Learn more:")
|
||||
print(" Read: WORKFLOWS.md (complete guide)")
|
||||
print(" Read: scripts/*/README.md (workflow details)")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
cli = SEOCLI()
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='SEO Automation CLI - Chain workflows together',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
seo-cli export # Export posts
|
||||
seo-cli full-pipeline # Export + Analyze + SEO check
|
||||
seo-cli export && seo-cli analyze # Chain commands
|
||||
seo-cli seo-check --top-n 50 # Check top 50 posts
|
||||
seo-cli analytics ga4.csv gsc.csv # Import analytics
|
||||
seo-cli status # Show output files
|
||||
"""
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', help='Workflow to run')
|
||||
|
||||
# Export workflow
|
||||
subparsers.add_parser('export', help='Export all posts from WordPress sites')
|
||||
|
||||
# Analyze workflow
|
||||
subparsers.add_parser('analyze', help='Analyze exported posts with Claude AI')
|
||||
|
||||
# Recategorize workflow
|
||||
subparsers.add_parser('recategorize', help='Re-categorize posts with Claude AI')
|
||||
|
||||
# SEO check workflow
|
||||
seo_parser = subparsers.add_parser('seo-check', help='Check SEO quality of titles/descriptions')
|
||||
seo_parser.add_argument('--top-n', type=int, help='Analyze top N posts with AI (costs money)')
|
||||
|
||||
# Analytics workflow
|
||||
analytics_parser = subparsers.add_parser('analytics', help='Import Google Analytics & Search Console')
|
||||
analytics_parser.add_argument('ga_export', help='Path to GA4 export CSV')
|
||||
analytics_parser.add_argument('gsc_export', help='Path to Search Console export CSV')
|
||||
|
||||
# Full pipeline
|
||||
full_parser = subparsers.add_parser('full-pipeline', help='Complete pipeline: export → analyze → seo-check')
|
||||
full_parser.add_argument('--no-analyze', action='store_true', help='Skip AI analysis')
|
||||
full_parser.add_argument('--no-seo', action='store_true', help='Skip SEO check')
|
||||
|
||||
# Category management
|
||||
subparsers.add_parser('categories', help='Manage categories with AI recommendations')
|
||||
|
||||
# Approval system
|
||||
approval_parser = subparsers.add_parser('approve', help='Approve recommendations from CSV files')
|
||||
approval_parser.add_argument('csv_files', nargs='*', help='CSV files containing recommendations to approve')
|
||||
|
||||
# Utilities
|
||||
subparsers.add_parser('status', help='Show status of output files')
|
||||
subparsers.add_parser('list', help='List all available workflows')
|
||||
subparsers.add_parser('help', help='Show this help message')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# If no command, show help
|
||||
if not args.command:
|
||||
cli.show_help()
|
||||
return 0
|
||||
|
||||
# Route to appropriate command
|
||||
if args.command == 'export':
|
||||
success = cli.export_posts()
|
||||
elif args.command == 'analyze':
|
||||
success = cli.analyze_with_ai()
|
||||
elif args.command == 'recategorize':
|
||||
success = cli.recategorize_with_ai()
|
||||
elif args.command == 'seo-check':
|
||||
success = cli.seo_check(top_n=args.top_n)
|
||||
elif args.command == 'analytics':
|
||||
success = cli.import_analytics(args.ga_export, args.gsc_export)
|
||||
elif args.command == 'full-pipeline':
|
||||
success = cli.full_pipeline(
|
||||
analyze=not args.no_analyze,
|
||||
seo=not args.no_seo
|
||||
)
|
||||
elif args.command == 'categories':
|
||||
success = cli.manage_categories()
|
||||
elif args.command == 'approve':
|
||||
success = cli.approve_recommendations(args.csv_files)
|
||||
elif args.command == 'status':
|
||||
cli.show_status()
|
||||
success = True
|
||||
elif args.command == 'list':
|
||||
cli.list_workflows()
|
||||
success = True
|
||||
elif args.command == 'help':
|
||||
cli.show_help()
|
||||
success = True
|
||||
else:
|
||||
cli.show_help()
|
||||
success = False
|
||||
|
||||
return 0 if success else 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
@@ -1,352 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
User Approval Mechanism for SEO Recommendations
|
||||
Allows users to review and approve recommendations from CSV files.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime
|
||||
from config import Config
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UserApprovalSystem:
|
||||
"""System for reviewing and approving SEO recommendations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the approval system."""
|
||||
self.output_dir = Path(__file__).parent.parent / 'output'
|
||||
self.approved_recommendations = []
|
||||
self.rejected_recommendations = []
|
||||
self.pending_recommendations = []
|
||||
|
||||
def load_recommendations_from_csv(self, csv_file: str) -> List[Dict]:
|
||||
"""Load recommendations from CSV file."""
|
||||
recommendations = []
|
||||
|
||||
if not Path(csv_file).exists():
|
||||
logger.error(f"CSV file not found: {csv_file}")
|
||||
return recommendations
|
||||
|
||||
try:
|
||||
with open(csv_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
recommendations.append(dict(row))
|
||||
|
||||
logger.info(f"Loaded {len(recommendations)} recommendations from {csv_file}")
|
||||
return recommendations
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading CSV: {e}")
|
||||
return recommendations
|
||||
|
||||
def display_recommendation(self, recommendation: Dict, index: int, total: int):
|
||||
"""Display a single recommendation for user review."""
|
||||
print(f"\n{'='*80}")
|
||||
print(f"RECOMMENDATION {index}/{total}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
# Display different fields depending on the type of recommendation
|
||||
if 'post_title' in recommendation:
|
||||
print(f"Post Title: {recommendation.get('post_title', 'N/A')}")
|
||||
print(f"Post ID: {recommendation.get('post_id', 'N/A')}")
|
||||
print(f"Site: {recommendation.get('site', 'N/A')}")
|
||||
print(f"Current Categories: {recommendation.get('current_categories', 'N/A')}")
|
||||
print(f"Proposed Category: {recommendation.get('proposed_category', 'N/A')}")
|
||||
print(f"Proposed Site: {recommendation.get('proposed_site', 'N/A')}")
|
||||
print(f"Reason: {recommendation.get('reason', 'N/A')}")
|
||||
print(f"Confidence: {recommendation.get('confidence', 'N/A')}")
|
||||
print(f"Content Preview: {recommendation.get('content_preview', 'N/A')[:100]}...")
|
||||
elif 'title' in recommendation:
|
||||
print(f"Post Title: {recommendation.get('title', 'N/A')}")
|
||||
print(f"Post ID: {recommendation.get('post_id', 'N/A')}")
|
||||
print(f"Site: {recommendation.get('site', 'N/A')}")
|
||||
print(f"Decision: {recommendation.get('decision', 'N/A')}")
|
||||
print(f"Recommended Category: {recommendation.get('recommended_category', 'N/A')}")
|
||||
print(f"Reason: {recommendation.get('reason', 'N/A')}")
|
||||
print(f"Priority: {recommendation.get('priority', 'N/A')}")
|
||||
print(f"AI Notes: {recommendation.get('ai_notes', 'N/A')}")
|
||||
else:
|
||||
# Generic display for other types of recommendations
|
||||
for key, value in recommendation.items():
|
||||
print(f"{key.replace('_', ' ').title()}: {value}")
|
||||
|
||||
def get_user_choice(self) -> str:
|
||||
"""Get user's approval choice."""
|
||||
while True:
|
||||
print(f"\nOptions:")
|
||||
print(f" 'y' or 'yes' - Approve this recommendation")
|
||||
print(f" 'n' or 'no' - Reject this recommendation")
|
||||
print(f" 's' or 'skip' - Skip this recommendation for later review")
|
||||
print(f" 'q' or 'quit' - Quit and save current progress")
|
||||
|
||||
choice = input(f"\nEnter your choice: ").strip().lower()
|
||||
|
||||
if choice in ['y', 'yes']:
|
||||
return 'approved'
|
||||
elif choice in ['n', 'no']:
|
||||
return 'rejected'
|
||||
elif choice in ['s', 'skip']:
|
||||
return 'pending'
|
||||
elif choice in ['q', 'quit']:
|
||||
return 'quit'
|
||||
else:
|
||||
print("Invalid choice. Please enter 'y', 'n', 's', or 'q'.")
|
||||
|
||||
def review_recommendations(self, recommendations: List[Dict], title: str = "Recommendations"):
|
||||
"""Review recommendations with user interaction."""
|
||||
print(f"\n{'='*80}")
|
||||
print(f"REVIEWING {title.upper()}")
|
||||
print(f"Total recommendations to review: {len(recommendations)}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
for i, recommendation in enumerate(recommendations, 1):
|
||||
self.display_recommendation(recommendation, i, len(recommendations))
|
||||
|
||||
choice = self.get_user_choice()
|
||||
|
||||
if choice == 'quit':
|
||||
logger.info("User chose to quit. Saving progress...")
|
||||
break
|
||||
elif choice == 'approved':
|
||||
recommendation['status'] = 'approved'
|
||||
self.approved_recommendations.append(recommendation)
|
||||
logger.info(f"Approved recommendation {i}")
|
||||
elif choice == 'rejected':
|
||||
recommendation['status'] = 'rejected'
|
||||
self.rejected_recommendations.append(recommendation)
|
||||
logger.info(f"Rejected recommendation {i}")
|
||||
elif choice == 'pending':
|
||||
recommendation['status'] = 'pending_review'
|
||||
self.pending_recommendations.append(recommendation)
|
||||
logger.info(f"Skipped recommendation {i} for later review")
|
||||
|
||||
def export_approved_recommendations(self, filename_suffix: str = "") -> str:
|
||||
"""Export approved recommendations to CSV."""
|
||||
if not self.approved_recommendations:
|
||||
logger.info("No approved recommendations to export")
|
||||
return ""
|
||||
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filename = f"approved_recommendations_{timestamp}{filename_suffix}.csv"
|
||||
csv_file = self.output_dir / filename
|
||||
|
||||
# Get all unique fieldnames from recommendations
|
||||
fieldnames = set()
|
||||
for rec in self.approved_recommendations:
|
||||
fieldnames.update(rec.keys())
|
||||
fieldnames = sorted(list(fieldnames))
|
||||
|
||||
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(self.approved_recommendations)
|
||||
|
||||
logger.info(f"Exported {len(self.approved_recommendations)} approved recommendations to: {csv_file}")
|
||||
return str(csv_file)
|
||||
|
||||
def export_rejected_recommendations(self, filename_suffix: str = "") -> str:
|
||||
"""Export rejected recommendations to CSV."""
|
||||
if not self.rejected_recommendations:
|
||||
logger.info("No rejected recommendations to export")
|
||||
return ""
|
||||
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filename = f"rejected_recommendations_{timestamp}{filename_suffix}.csv"
|
||||
csv_file = self.output_dir / filename
|
||||
|
||||
# Get all unique fieldnames from recommendations
|
||||
fieldnames = set()
|
||||
for rec in self.rejected_recommendations:
|
||||
fieldnames.update(rec.keys())
|
||||
fieldnames = sorted(list(fieldnames))
|
||||
|
||||
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(self.rejected_recommendations)
|
||||
|
||||
logger.info(f"Exported {len(self.rejected_recommendations)} rejected recommendations to: {csv_file}")
|
||||
return str(csv_file)
|
||||
|
||||
def export_pending_recommendations(self, filename_suffix: str = "") -> str:
|
||||
"""Export pending recommendations to CSV."""
|
||||
if not self.pending_recommendations:
|
||||
logger.info("No pending recommendations to export")
|
||||
return ""
|
||||
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filename = f"pending_recommendations_{timestamp}{filename_suffix}.csv"
|
||||
csv_file = self.output_dir / filename
|
||||
|
||||
# Get all unique fieldnames from recommendations
|
||||
fieldnames = set()
|
||||
for rec in self.pending_recommendations:
|
||||
fieldnames.update(rec.keys())
|
||||
fieldnames = sorted(list(fieldnames))
|
||||
|
||||
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(self.pending_recommendations)
|
||||
|
||||
logger.info(f"Exported {len(self.pending_recommendations)} pending recommendations to: {csv_file}")
|
||||
return str(csv_file)
|
||||
|
||||
def run_interactive_approval(self, csv_files: List[str]):
|
||||
"""Run interactive approval process for multiple CSV files."""
|
||||
logger.info("="*70)
|
||||
logger.info("USER APPROVAL SYSTEM FOR SEO RECOMMENDATIONS")
|
||||
logger.info("="*70)
|
||||
|
||||
for csv_file in csv_files:
|
||||
logger.info(f"\nLoading recommendations from: {csv_file}")
|
||||
recommendations = self.load_recommendations_from_csv(csv_file)
|
||||
|
||||
if not recommendations:
|
||||
logger.warning(f"No recommendations found in {csv_file}, skipping...")
|
||||
continue
|
||||
|
||||
# Get the filename without path for the title
|
||||
filename = Path(csv_file).stem
|
||||
self.review_recommendations(recommendations, title=filename)
|
||||
|
||||
# Export results
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("EXPORTING RESULTS")
|
||||
logger.info("="*70)
|
||||
|
||||
approved_file = self.export_approved_recommendations()
|
||||
rejected_file = self.export_rejected_recommendations()
|
||||
pending_file = self.export_pending_recommendations()
|
||||
|
||||
# Summary
|
||||
logger.info(f"\n{'─'*70}")
|
||||
logger.info("APPROVAL SUMMARY:")
|
||||
logger.info(f" Approved: {len(self.approved_recommendations)}")
|
||||
logger.info(f" Rejected: {len(self.rejected_recommendations)}")
|
||||
logger.info(f" Pending: {len(self.pending_recommendations)}")
|
||||
logger.info(f"{'─'*70}")
|
||||
|
||||
if approved_file:
|
||||
logger.info(f"\nApproved recommendations saved to: {approved_file}")
|
||||
if rejected_file:
|
||||
logger.info(f"Rejected recommendations saved to: {rejected_file}")
|
||||
if pending_file:
|
||||
logger.info(f"Pending recommendations saved to: {pending_file}")
|
||||
|
||||
logger.info(f"\n✓ Approval process complete!")
|
||||
|
||||
def run_auto_approval(self, csv_files: List[str], auto_approve_threshold: float = 0.8):
|
||||
"""Auto-approve recommendations based on confidence threshold."""
|
||||
logger.info("="*70)
|
||||
logger.info("AUTO APPROVAL SYSTEM FOR SEO RECOMMENDATIONS")
|
||||
logger.info("="*70)
|
||||
logger.info(f"Auto-approval threshold: {auto_approve_threshold}")
|
||||
|
||||
all_recommendations = []
|
||||
for csv_file in csv_files:
|
||||
logger.info(f"\nLoading recommendations from: {csv_file}")
|
||||
recommendations = self.load_recommendations_from_csv(csv_file)
|
||||
all_recommendations.extend(recommendations)
|
||||
|
||||
approved_count = 0
|
||||
rejected_count = 0
|
||||
|
||||
for rec in all_recommendations:
|
||||
# Check if there's a confidence field and if it meets the threshold
|
||||
confidence_str = rec.get('confidence', 'Low').lower()
|
||||
confidence_value = 0.0
|
||||
|
||||
if confidence_str == 'high':
|
||||
confidence_value = 0.9
|
||||
elif confidence_str == 'medium':
|
||||
confidence_value = 0.6
|
||||
elif confidence_str == 'low':
|
||||
confidence_value = 0.3
|
||||
else:
|
||||
# Try to parse as numeric value if possible
|
||||
try:
|
||||
confidence_value = float(confidence_str)
|
||||
except ValueError:
|
||||
confidence_value = 0.3 # Default to low
|
||||
|
||||
if confidence_value >= auto_approve_threshold:
|
||||
rec['status'] = 'auto_approved'
|
||||
self.approved_recommendations.append(rec)
|
||||
approved_count += 1
|
||||
else:
|
||||
rec['status'] = 'auto_rejected'
|
||||
self.rejected_recommendations.append(rec)
|
||||
rejected_count += 1
|
||||
|
||||
# Export results
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("EXPORTING AUTO-APPROVAL RESULTS")
|
||||
logger.info("="*70)
|
||||
|
||||
approved_file = self.export_approved_recommendations("_auto")
|
||||
rejected_file = self.export_rejected_recommendations("_auto")
|
||||
|
||||
# Summary
|
||||
logger.info(f"\n{'─'*70}")
|
||||
logger.info("AUTO APPROVAL SUMMARY:")
|
||||
logger.info(f" Auto-approved: {approved_count}")
|
||||
logger.info(f" Auto-rejected: {rejected_count}")
|
||||
logger.info(f"{'─'*70}")
|
||||
|
||||
if approved_file:
|
||||
logger.info(f"\nAuto-approved recommendations saved to: {approved_file}")
|
||||
if rejected_file:
|
||||
logger.info(f"Auto-rejected recommendations saved to: {rejected_file}")
|
||||
|
||||
logger.info(f"\n✓ Auto-approval process complete!")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Review and approve SEO recommendations'
|
||||
)
|
||||
parser.add_argument(
|
||||
'csv_files',
|
||||
nargs='+',
|
||||
help='CSV files containing recommendations to review'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--auto',
|
||||
action='store_true',
|
||||
help='Run auto-approval mode instead of interactive mode'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--threshold',
|
||||
type=float,
|
||||
default=0.8,
|
||||
help='Confidence threshold for auto-approval (default: 0.8)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
approval_system = UserApprovalSystem()
|
||||
|
||||
if args.auto:
|
||||
approval_system.run_auto_approval(args.csv_files, args.threshold)
|
||||
else:
|
||||
approval_system.run_interactive_approval(args.csv_files)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
2
seo
2
seo
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SEO Automation CLI - Main executable
|
||||
Entry point for the SEO automation tool.
|
||||
Single entry point for SEO automation tool.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
@@ -1,7 +1,14 @@
|
||||
"""
|
||||
SEO Automation Tool - Integrated Application
|
||||
A comprehensive WordPress SEO automation suite.
|
||||
SEO Automation Tool - Complete Integrated Package
|
||||
Single entry point for all SEO automation functionality.
|
||||
"""
|
||||
|
||||
__version__ = '1.0.0'
|
||||
__author__ = 'SEO Automation Team'
|
||||
__all__ = ['SEOApp', 'PostExporter', 'PostAnalyzer', 'CategoryProposer']
|
||||
|
||||
# Import main classes for easy access
|
||||
from .app import SEOApp
|
||||
from .exporter import PostExporter
|
||||
from .analyzer import PostAnalyzer, EnhancedPostAnalyzer
|
||||
from .category_proposer import CategoryProposer
|
||||
|
||||
@@ -1,15 +1,353 @@
|
||||
"""
|
||||
Analyzer Module - AI-powered post analysis
|
||||
Post Analyzer - AI-powered post analysis with selective field support
|
||||
"""
|
||||
|
||||
import sys
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
import requests
|
||||
|
||||
# Import from scripts directory (parent of src)
|
||||
scripts_dir = Path(__file__).parents[2] / 'scripts'
|
||||
if str(scripts_dir) not in sys.path:
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
from .config import Config
|
||||
|
||||
from ai_analyze_posts_for_decisions import PostAnalyzer
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ['PostAnalyzer']
|
||||
|
||||
class PostAnalyzer:
|
||||
"""Basic post analyzer (legacy compatibility)."""
|
||||
|
||||
def __init__(self, csv_file: str):
|
||||
self.csv_file = Path(csv_file)
|
||||
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
||||
self.ai_model = Config.AI_MODEL
|
||||
self.posts = []
|
||||
self.analyzed_posts = []
|
||||
self.api_calls = 0
|
||||
self.ai_cost = 0.0
|
||||
|
||||
def load_csv(self) -> bool:
|
||||
"""Load posts from CSV."""
|
||||
if not self.csv_file.exists():
|
||||
logger.error(f"CSV file not found: {self.csv_file}")
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(self.csv_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
self.posts = list(reader)
|
||||
logger.info(f"✓ Loaded {len(self.posts)} posts")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading CSV: {e}")
|
||||
return False
|
||||
|
||||
def run(self) -> None:
|
||||
"""Run basic analysis (placeholder for legacy compatibility)."""
|
||||
if not self.load_csv():
|
||||
return
|
||||
logger.warning("Basic PostAnalyzer is deprecated. Use EnhancedPostAnalyzer instead.")
|
||||
|
||||
|
||||
class EnhancedPostAnalyzer:
|
||||
"""Enhanced analyzer with selective field analysis and in-place updates."""
|
||||
|
||||
def __init__(self, csv_file: str, analyze_fields: Optional[List[str]] = None):
|
||||
"""
|
||||
Initialize analyzer.
|
||||
|
||||
Args:
|
||||
csv_file: Path to input CSV
|
||||
analyze_fields: List of fields to analyze ['title', 'meta_description', 'categories', 'site']
|
||||
"""
|
||||
self.csv_file = Path(csv_file)
|
||||
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
||||
self.ai_model = Config.AI_MODEL
|
||||
self.posts = []
|
||||
self.analyzed_posts = []
|
||||
self.api_calls = 0
|
||||
self.ai_cost = 0.0
|
||||
|
||||
if analyze_fields is None:
|
||||
self.analyze_fields = ['title', 'meta_description', 'categories', 'site']
|
||||
else:
|
||||
self.analyze_fields = analyze_fields
|
||||
|
||||
logger.info(f"Fields to analyze: {', '.join(self.analyze_fields)}")
|
||||
|
||||
def load_csv(self) -> bool:
|
||||
"""Load posts from CSV file."""
|
||||
logger.info(f"Loading CSV: {self.csv_file}")
|
||||
|
||||
if not self.csv_file.exists():
|
||||
logger.error(f"CSV file not found: {self.csv_file}")
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(self.csv_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
self.posts = list(reader)
|
||||
|
||||
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading CSV: {e}")
|
||||
return False
|
||||
|
||||
def get_ai_recommendations(self, batch: List[Dict], fields: List[str]) -> Optional[str]:
|
||||
"""Get AI recommendations for specific fields."""
|
||||
if not self.openrouter_api_key:
|
||||
logger.error("OPENROUTER_API_KEY not set")
|
||||
return None
|
||||
|
||||
# Format posts for AI
|
||||
formatted_posts = []
|
||||
for i, post in enumerate(batch, 1):
|
||||
post_text = f"{i}. POST ID: {post['post_id']}\n"
|
||||
post_text += f" Site: {post.get('site', '')}\n"
|
||||
|
||||
if 'title' in fields:
|
||||
post_text += f" Title: {post.get('title', '')}\n"
|
||||
|
||||
if 'meta_description' in fields:
|
||||
post_text += f" Meta Description: {post.get('meta_description', '')}\n"
|
||||
|
||||
if 'categories' in fields:
|
||||
post_text += f" Categories: {post.get('categories', '')}\n"
|
||||
|
||||
if 'content_preview' in post:
|
||||
post_text += f" Content Preview: {post.get('content_preview', '')[:300]}...\n"
|
||||
|
||||
formatted_posts.append(post_text)
|
||||
|
||||
posts_text = "\n".join(formatted_posts)
|
||||
|
||||
# Build prompt based on requested fields
|
||||
prompt_parts = ["Analyze these blog posts and provide recommendations.\n\n"]
|
||||
|
||||
if 'site' in fields:
|
||||
prompt_parts.append("""Website Strategy:
|
||||
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
|
||||
- webscroll.fr: Torrenting, File-Sharing, Tracker guides
|
||||
- hellogeek.net: Low-traffic, experimental, off-brand content
|
||||
|
||||
""")
|
||||
|
||||
prompt_parts.append(posts_text)
|
||||
prompt_parts.append("\nFor EACH post, provide a JSON object with:\n{\n")
|
||||
|
||||
if 'title' in fields:
|
||||
prompt_parts.append(' "proposed_title": "<Improved SEO title>",\n')
|
||||
prompt_parts.append(' "title_reason": "<Reason for title change>",\n')
|
||||
|
||||
if 'meta_description' in fields:
|
||||
prompt_parts.append(' "proposed_meta_description": "<Improved meta description (120-160 chars)>",\n')
|
||||
prompt_parts.append(' "meta_reason": "<Reason for meta description change>",\n')
|
||||
|
||||
if 'categories' in fields:
|
||||
prompt_parts.append(' "proposed_category": "<Best category>",\n')
|
||||
prompt_parts.append(' "category_reason": "<Reason for category change>",\n')
|
||||
|
||||
if 'site' in fields:
|
||||
prompt_parts.append(' "proposed_site": "<Best site for this post>",\n')
|
||||
prompt_parts.append(' "site_reason": "<Reason for site recommendation>",\n')
|
||||
|
||||
prompt_parts.append(' "confidence": "<High|Medium|Low>",\n')
|
||||
prompt_parts.append(' "priority": "<High|Medium|Low>"\n}')
|
||||
prompt_parts.append("\nReturn ONLY a JSON array of objects, one per post.")
|
||||
|
||||
prompt = "".join(prompt_parts)
|
||||
|
||||
try:
|
||||
logger.info(f" Sending batch to AI for analysis...")
|
||||
|
||||
response = requests.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": self.ai_model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"temperature": 0.3,
|
||||
},
|
||||
timeout=60
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
self.api_calls += 1
|
||||
|
||||
usage = result.get('usage', {})
|
||||
input_tokens = usage.get('prompt_tokens', 0)
|
||||
output_tokens = usage.get('completion_tokens', 0)
|
||||
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
||||
|
||||
recommendations_text = result['choices'][0]['message']['content'].strip()
|
||||
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
|
||||
|
||||
return recommendations_text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting AI recommendations: {e}")
|
||||
return None
|
||||
|
||||
def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
|
||||
"""Parse JSON recommendations from AI."""
|
||||
try:
|
||||
start_idx = recommendations_json.find('[')
|
||||
end_idx = recommendations_json.rfind(']') + 1
|
||||
|
||||
if start_idx == -1 or end_idx == 0:
|
||||
logger.error("Could not find JSON array in response")
|
||||
return []
|
||||
|
||||
json_str = recommendations_json[start_idx:end_idx]
|
||||
recommendations = json.loads(json_str)
|
||||
|
||||
return recommendations
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing JSON recommendations: {e}")
|
||||
return []
|
||||
|
||||
def analyze_posts(self, batch_size: int = 10) -> bool:
|
||||
"""Analyze all posts in batches."""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("ANALYZING POSTS WITH AI")
|
||||
logger.info("="*70 + "\n")
|
||||
|
||||
batches = [self.posts[i:i + batch_size] for i in range(0, len(self.posts), batch_size)]
|
||||
logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches...\n")
|
||||
|
||||
all_recommendations = {}
|
||||
|
||||
for batch_num, batch in enumerate(batches, 1):
|
||||
logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
|
||||
|
||||
recommendations_json = self.get_ai_recommendations(batch, self.analyze_fields)
|
||||
|
||||
if not recommendations_json:
|
||||
logger.error(f" Failed to get recommendations for batch {batch_num}")
|
||||
continue
|
||||
|
||||
recommendations = self.parse_recommendations(recommendations_json)
|
||||
|
||||
for rec in recommendations:
|
||||
all_recommendations[str(rec.get('post_id', ''))] = rec
|
||||
|
||||
logger.info(f" ✓ Got {len(recommendations)} recommendations")
|
||||
|
||||
logger.info(f"\n✓ Analysis complete!")
|
||||
logger.info(f" Total recommendations: {len(all_recommendations)}")
|
||||
logger.info(f" API calls: {self.api_calls}")
|
||||
logger.info(f" Estimated cost: ${self.ai_cost:.4f}")
|
||||
|
||||
# Map recommendations to posts
|
||||
for post in self.posts:
|
||||
post_id = str(post['post_id'])
|
||||
if post_id in all_recommendations:
|
||||
rec = all_recommendations[post_id]
|
||||
|
||||
# Add only requested fields
|
||||
if 'title' in self.analyze_fields:
|
||||
post['proposed_title'] = rec.get('proposed_title', post.get('title', ''))
|
||||
post['title_reason'] = rec.get('title_reason', '')
|
||||
|
||||
if 'meta_description' in self.analyze_fields:
|
||||
post['proposed_meta_description'] = rec.get('proposed_meta_description', post.get('meta_description', ''))
|
||||
post['meta_reason'] = rec.get('meta_reason', '')
|
||||
|
||||
if 'categories' in self.analyze_fields:
|
||||
post['proposed_category'] = rec.get('proposed_category', post.get('categories', ''))
|
||||
post['category_reason'] = rec.get('category_reason', '')
|
||||
|
||||
if 'site' in self.analyze_fields:
|
||||
post['proposed_site'] = rec.get('proposed_site', post.get('site', ''))
|
||||
post['site_reason'] = rec.get('site_reason', '')
|
||||
|
||||
post['ai_confidence'] = rec.get('confidence', 'Medium')
|
||||
post['ai_priority'] = rec.get('priority', 'Medium')
|
||||
else:
|
||||
if 'title' in self.analyze_fields:
|
||||
post['proposed_title'] = post.get('title', '')
|
||||
post['title_reason'] = 'No AI recommendation'
|
||||
|
||||
if 'meta_description' in self.analyze_fields:
|
||||
post['proposed_meta_description'] = post.get('meta_description', '')
|
||||
post['meta_reason'] = 'No AI recommendation'
|
||||
|
||||
if 'categories' in self.analyze_fields:
|
||||
post['proposed_category'] = post.get('categories', '')
|
||||
post['category_reason'] = 'No AI recommendation'
|
||||
|
||||
if 'site' in self.analyze_fields:
|
||||
post['proposed_site'] = post.get('site', '')
|
||||
post['site_reason'] = 'No AI recommendation'
|
||||
|
||||
post['ai_confidence'] = 'Unknown'
|
||||
post['ai_priority'] = 'Medium'
|
||||
|
||||
self.analyzed_posts.append(post)
|
||||
|
||||
return len(self.analyzed_posts) > 0
|
||||
|
||||
def export_results(self, output_file: Optional[str] = None, update_input: bool = False) -> str:
|
||||
"""Export results to CSV."""
|
||||
if update_input:
|
||||
backup_file = self.csv_file.parent / f"{self.csv_file.stem}_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
||||
shutil.copy2(self.csv_file, backup_file)
|
||||
logger.info(f"✓ Created backup: {backup_file}")
|
||||
output_file = self.csv_file
|
||||
elif not output_file:
|
||||
output_dir = Path(__file__).parent.parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
output_file = output_dir / f'analyzed_posts_{timestamp}.csv'
|
||||
|
||||
output_file = Path(output_file)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not self.analyzed_posts:
|
||||
logger.error("No analyzed posts to export")
|
||||
return ""
|
||||
|
||||
original_fields = list(self.analyzed_posts[0].keys())
|
||||
|
||||
new_fields = []
|
||||
if 'title' in self.analyze_fields:
|
||||
new_fields.extend(['proposed_title', 'title_reason'])
|
||||
if 'meta_description' in self.analyze_fields:
|
||||
new_fields.extend(['proposed_meta_description', 'meta_reason'])
|
||||
if 'categories' in self.analyze_fields:
|
||||
new_fields.extend(['proposed_category', 'category_reason'])
|
||||
if 'site' in self.analyze_fields:
|
||||
new_fields.extend(['proposed_site', 'site_reason'])
|
||||
|
||||
new_fields.extend(['ai_confidence', 'ai_priority'])
|
||||
|
||||
fieldnames = original_fields + new_fields
|
||||
|
||||
logger.info(f"\nExporting results to: {output_file}")
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(self.analyzed_posts)
|
||||
|
||||
logger.info(f"✓ Exported {len(self.analyzed_posts)} posts")
|
||||
return str(output_file)
|
||||
|
||||
def run(self, output_file: Optional[str] = None, update_input: bool = False, batch_size: int = 10) -> str:
|
||||
"""Run complete analysis."""
|
||||
if not self.load_csv():
|
||||
return ""
|
||||
|
||||
if not self.analyze_posts(batch_size=batch_size):
|
||||
logger.error("Failed to analyze posts")
|
||||
return ""
|
||||
|
||||
return self.export_results(output_file=output_file, update_input=update_input)
|
||||
|
||||
207
src/seo/app.py
207
src/seo/app.py
@@ -8,11 +8,8 @@ from datetime import datetime
|
||||
from typing import Optional, List
|
||||
|
||||
from .exporter import PostExporter
|
||||
from .analyzer import PostAnalyzer
|
||||
from .recategorizer import PostRecategorizer
|
||||
from .seo_checker import MultiSiteSEOAnalyzer
|
||||
from .categories import CategoryManager
|
||||
from .approval import UserApprovalSystem
|
||||
from .analyzer import EnhancedPostAnalyzer
|
||||
from .category_proposer import CategoryProposer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -22,70 +19,38 @@ class SEOApp:
|
||||
Main SEO Application class.
|
||||
|
||||
Provides a unified interface for all SEO automation tasks.
|
||||
Inspired by Ruby on Rails' Active Record pattern.
|
||||
|
||||
Usage:
|
||||
app = SEOApp()
|
||||
app.export()
|
||||
app.analyze()
|
||||
app.seo_check()
|
||||
"""
|
||||
|
||||
def __init__(self, verbose: bool = False):
|
||||
"""
|
||||
Initialize the SEO application.
|
||||
|
||||
Args:
|
||||
verbose: Enable verbose logging
|
||||
"""
|
||||
"""Initialize the SEO application."""
|
||||
self.verbose = verbose
|
||||
self.output_dir = Path(__file__).parent.parent.parent / 'output'
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize components
|
||||
self.exporter = None
|
||||
self.analyzer = None
|
||||
self.recategorizer = None
|
||||
self.seo_checker = None
|
||||
self.category_manager = None
|
||||
self.approval_system = None
|
||||
|
||||
if verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
def export(self) -> str:
|
||||
"""
|
||||
Export all posts from WordPress sites.
|
||||
|
||||
Returns:
|
||||
Path to exported CSV file
|
||||
"""
|
||||
"""Export all posts from WordPress sites."""
|
||||
logger.info("📦 Exporting all posts from WordPress sites...")
|
||||
self.exporter = PostExporter()
|
||||
self.exporter.run()
|
||||
|
||||
# Get the exported file path
|
||||
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||
csv_file = self.output_dir / f'all_posts_{date_str}.csv'
|
||||
|
||||
logger.info(f"✅ Export completed: {csv_file}")
|
||||
return str(csv_file)
|
||||
exporter = PostExporter()
|
||||
return exporter.run()
|
||||
|
||||
def analyze(self, csv_file: Optional[str] = None) -> str:
|
||||
def analyze(self, csv_file: Optional[str] = None, fields: Optional[List[str]] = None,
|
||||
update: bool = False, output: Optional[str] = None) -> str:
|
||||
"""
|
||||
Analyze posts with AI for recommendations.
|
||||
|
||||
Args:
|
||||
csv_file: Path to CSV file (uses latest export if not provided)
|
||||
|
||||
Returns:
|
||||
Path to analysis results
|
||||
fields: Fields to analyze ['title', 'meta_description', 'categories', 'site']
|
||||
update: If True, update input CSV (creates backup)
|
||||
output: Custom output file path
|
||||
"""
|
||||
logger.info("🤖 Analyzing posts with AI for recommendations...")
|
||||
|
||||
# Find CSV file
|
||||
if not csv_file:
|
||||
csv_file = self._find_latest_export()
|
||||
|
||||
@@ -94,26 +59,13 @@ class SEOApp:
|
||||
|
||||
logger.info(f"Using file: {csv_file}")
|
||||
|
||||
# Run analysis
|
||||
self.analyzer = PostAnalyzer(csv_file)
|
||||
self.analyzer.run()
|
||||
|
||||
logger.info("✅ AI analysis completed!")
|
||||
return csv_file
|
||||
analyzer = EnhancedPostAnalyzer(csv_file, analyze_fields=fields)
|
||||
return analyzer.run(output_file=output, update_input=update)
|
||||
|
||||
def recategorize(self, csv_file: Optional[str] = None) -> str:
|
||||
"""
|
||||
Recategorize posts with AI suggestions.
|
||||
def category_propose(self, csv_file: Optional[str] = None, output: Optional[str] = None) -> str:
|
||||
"""Propose categories for posts."""
|
||||
logger.info("🏷️ Proposing categories with AI...")
|
||||
|
||||
Args:
|
||||
csv_file: Path to CSV file (uses latest export if not provided)
|
||||
|
||||
Returns:
|
||||
Path to recategorization results
|
||||
"""
|
||||
logger.info("🏷️ Recategorizing posts with AI suggestions...")
|
||||
|
||||
# Find CSV file
|
||||
if not csv_file:
|
||||
csv_file = self._find_latest_export()
|
||||
|
||||
@@ -122,122 +74,11 @@ class SEOApp:
|
||||
|
||||
logger.info(f"Using file: {csv_file}")
|
||||
|
||||
# Run recategorization
|
||||
self.recategorizer = PostRecategorizer(csv_file)
|
||||
self.recategorizer.run()
|
||||
|
||||
logger.info("✅ Recategorization completed!")
|
||||
return csv_file
|
||||
|
||||
def seo_check(self, top_n: int = 10) -> None:
|
||||
"""
|
||||
Check SEO quality of titles and descriptions.
|
||||
|
||||
Args:
|
||||
top_n: Number of top posts to get AI recommendations for
|
||||
"""
|
||||
logger.info("🔍 Checking SEO quality of titles/descriptions...")
|
||||
|
||||
self.seo_checker = MultiSiteSEOAnalyzer()
|
||||
self.seo_checker.run(use_ai=True, top_n=top_n)
|
||||
|
||||
logger.info("✅ SEO check completed!")
|
||||
|
||||
def categories(self) -> None:
|
||||
"""Manage categories across all sites."""
|
||||
logger.info("🗂️ Managing categories across all sites...")
|
||||
|
||||
self.category_manager = CategoryManager()
|
||||
self.category_manager.run()
|
||||
|
||||
logger.info("✅ Category management completed!")
|
||||
|
||||
def approve(self, files: Optional[List[str]] = None) -> None:
|
||||
"""
|
||||
Review and approve recommendations.
|
||||
|
||||
Args:
|
||||
files: List of CSV files to review (auto-detects if not provided)
|
||||
"""
|
||||
logger.info("✅ Reviewing and approving recommendations...")
|
||||
|
||||
self.approval_system = UserApprovalSystem()
|
||||
|
||||
if not files:
|
||||
# Auto-detect recommendation files
|
||||
files = self._find_recommendation_files()
|
||||
|
||||
if not files:
|
||||
raise FileNotFoundError("No recommendation files found. Run analyze() or categories() first.")
|
||||
|
||||
logger.info(f"Found {len(files)} recommendation files to review")
|
||||
self.approval_system.run_interactive_approval(files)
|
||||
|
||||
logger.info("✅ Approval process completed!")
|
||||
|
||||
def full_pipeline(self) -> None:
|
||||
"""
|
||||
Run complete workflow: export → analyze → seo_check
|
||||
"""
|
||||
logger.info("🚀 Running full SEO automation pipeline...")
|
||||
|
||||
# Step 1: Export
|
||||
logger.info("\n📦 Step 1/3: Exporting posts...")
|
||||
self.export()
|
||||
|
||||
# Step 2: Analyze
|
||||
logger.info("\n🤖 Step 2/3: Analyzing with AI...")
|
||||
self.analyze()
|
||||
|
||||
# Step 3: SEO Check
|
||||
logger.info("\n🔍 Step 3/3: Checking SEO quality...")
|
||||
self.seo_check()
|
||||
|
||||
logger.info("\n✅ Full pipeline completed!")
|
||||
|
||||
def _find_latest_export(self) -> Optional[str]:
|
||||
"""
|
||||
Find the latest exported CSV file.
|
||||
|
||||
Returns:
|
||||
Path to latest CSV file or None if not found
|
||||
"""
|
||||
csv_files = list(self.output_dir.glob('all_posts_*.csv'))
|
||||
|
||||
if not csv_files:
|
||||
return None
|
||||
|
||||
latest = max(csv_files, key=lambda f: f.stat().st_ctime)
|
||||
return str(latest)
|
||||
|
||||
def _find_recommendation_files(self) -> List[str]:
|
||||
"""
|
||||
Find recommendation files in output directory.
|
||||
|
||||
Returns:
|
||||
List of paths to recommendation files
|
||||
"""
|
||||
patterns = [
|
||||
'category_assignments_*.csv',
|
||||
'posts_with_ai_recommendations_*.csv',
|
||||
'posts_to_move_*.csv',
|
||||
'posts_to_consolidate_*.csv',
|
||||
'posts_to_delete_*.csv'
|
||||
]
|
||||
|
||||
files = []
|
||||
for pattern in patterns:
|
||||
files.extend(self.output_dir.glob(pattern))
|
||||
|
||||
return [str(f) for f in files]
|
||||
proposer = CategoryProposer(csv_file)
|
||||
return proposer.run(output_file=output)
|
||||
|
||||
def status(self) -> dict:
|
||||
"""
|
||||
Get status of output files.
|
||||
|
||||
Returns:
|
||||
Dictionary with file information
|
||||
"""
|
||||
"""Get status of output files."""
|
||||
files = list(self.output_dir.glob('*.csv'))
|
||||
|
||||
status_info = {
|
||||
@@ -253,3 +94,13 @@ class SEOApp:
|
||||
})
|
||||
|
||||
return status_info
|
||||
|
||||
def _find_latest_export(self) -> Optional[str]:
|
||||
"""Find the latest exported CSV file."""
|
||||
csv_files = list(self.output_dir.glob('all_posts_*.csv'))
|
||||
|
||||
if not csv_files:
|
||||
return None
|
||||
|
||||
latest = max(csv_files, key=lambda f: f.stat().st_ctime)
|
||||
return str(latest)
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
"""
|
||||
Approval System Module - User approval for recommendations
|
||||
Placeholder for future implementation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
# Import from scripts directory (parent of src)
|
||||
scripts_dir = Path(__file__).parents[2] / 'scripts'
|
||||
if str(scripts_dir) not in sys.path:
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from user_approval import UserApprovalSystem
|
||||
|
||||
__all__ = ['UserApprovalSystem']
|
||||
class UserApprovalSystem:
|
||||
"""User approval system (placeholder)."""
|
||||
|
||||
def __init__(self):
|
||||
logger.warning("UserApprovalSystem is a placeholder. Implement full functionality as needed.")
|
||||
|
||||
def run_interactive_approval(self, files):
|
||||
logger.info("Approval system not yet implemented in integrated package.")
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
"""
|
||||
Category Manager Module - Category management across sites
|
||||
Placeholder for future implementation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
# Import from scripts directory (parent of src)
|
||||
scripts_dir = Path(__file__).parents[2] / 'scripts'
|
||||
if str(scripts_dir) not in sys.path:
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from category_manager import CategoryManager
|
||||
|
||||
__all__ = ['CategoryManager']
|
||||
class CategoryManager:
|
||||
"""Category manager (placeholder)."""
|
||||
|
||||
def __init__(self):
|
||||
logger.warning("CategoryManager is a placeholder. Implement full functionality as needed.")
|
||||
|
||||
def run(self):
|
||||
logger.info("Category management not yet implemented in integrated package.")
|
||||
|
||||
@@ -1,18 +1,16 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Category Proposer - AI-powered category suggestions
|
||||
Analyzes posts and proposes optimal categories based on content.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
import requests
|
||||
from datetime import datetime
|
||||
from config import Config
|
||||
|
||||
from .config import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -56,7 +54,6 @@ class CategoryProposer:
|
||||
logger.error("OPENROUTER_API_KEY not set")
|
||||
return None
|
||||
|
||||
# Format posts for AI
|
||||
formatted = []
|
||||
for i, post in enumerate(batch, 1):
|
||||
text = f"{i}. ID: {post['post_id']}\n"
|
||||
@@ -161,7 +158,6 @@ Return ONLY a JSON array with one object per post."""
|
||||
logger.info(f" API calls: {self.api_calls}")
|
||||
logger.info(f" Cost: ${self.ai_cost:.4f}")
|
||||
|
||||
# Map proposals to posts
|
||||
for post in self.posts:
|
||||
post_id = str(post['post_id'])
|
||||
proposal = all_proposals.get(post_id, {})
|
||||
@@ -180,7 +176,7 @@ Return ONLY a JSON array with one object per post."""
|
||||
def export_proposals(self, output_file: Optional[str] = None) -> str:
|
||||
"""Export category proposals to CSV."""
|
||||
if not output_file:
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir = Path(__file__).parent.parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
output_file = output_dir / f'category_proposals_{timestamp}.csv'
|
||||
@@ -207,33 +203,10 @@ Return ONLY a JSON array with one object per post."""
|
||||
def run(self, output_file: Optional[str] = None, batch_size: int = 10) -> str:
|
||||
"""Run complete category proposal process."""
|
||||
if not self.load_csv():
|
||||
sys.exit(1)
|
||||
return ""
|
||||
|
||||
if not self.propose_categories(batch_size=batch_size):
|
||||
logger.error("Failed to propose categories")
|
||||
sys.exit(1)
|
||||
return ""
|
||||
|
||||
return self.export_proposals(output_file)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='AI-powered category proposer for blog posts'
|
||||
)
|
||||
parser.add_argument('csv_file', help='Input CSV file with posts')
|
||||
parser.add_argument('--output', '-o', help='Output CSV file')
|
||||
parser.add_argument('--batch-size', type=int, default=10, help='Batch size')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
proposer = CategoryProposer(args.csv_file)
|
||||
output_file = proposer.run(batch_size=args.batch_size)
|
||||
|
||||
logger.info(f"\n✓ Category proposals saved to: {output_file}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
142
src/seo/cli.py
142
src/seo/cli.py
@@ -26,12 +26,9 @@ def main():
|
||||
Examples:
|
||||
seo export Export all posts from WordPress sites
|
||||
seo analyze Analyze posts with AI for recommendations
|
||||
seo analyze posts.csv Analyze specific CSV file
|
||||
seo recategorize Recategorize posts with AI
|
||||
seo seo_check Check SEO quality of titles/descriptions
|
||||
seo categories Manage categories across sites
|
||||
seo approve Review and approve recommendations
|
||||
seo full_pipeline Run complete workflow: export → analyze → seo_check
|
||||
seo analyze -f title Analyze only titles
|
||||
seo analyze -u -f meta Update CSV with meta descriptions
|
||||
seo category_propose Propose categories based on content
|
||||
seo status Show output files status
|
||||
"""
|
||||
)
|
||||
@@ -40,11 +37,10 @@ Examples:
|
||||
parser.add_argument('args', nargs='*', help='Arguments for the command')
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Show what would be done')
|
||||
parser.add_argument('--top-n', type=int, default=10, help='Number of top posts for AI analysis')
|
||||
parser.add_argument('--fields', '-f', nargs='+',
|
||||
choices=['title', 'meta_description', 'categories', 'site'],
|
||||
help='Fields to analyze (for analyze command)')
|
||||
parser.add_argument('--update', '-u', action='store_true', help='Update input file (creates backup)')
|
||||
help='Fields to analyze')
|
||||
parser.add_argument('--update', '-u', action='store_true', help='Update input file')
|
||||
parser.add_argument('--output', '-o', help='Output file path')
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -67,12 +63,7 @@ Examples:
|
||||
commands = {
|
||||
'export': cmd_export,
|
||||
'analyze': cmd_analyze,
|
||||
'recategorize': cmd_recategorize,
|
||||
'seo_check': cmd_seo_check,
|
||||
'categories': cmd_categories,
|
||||
'category_propose': cmd_category_propose,
|
||||
'approve': cmd_approve,
|
||||
'full_pipeline': cmd_full_pipeline,
|
||||
'status': cmd_status,
|
||||
'help': cmd_help,
|
||||
}
|
||||
@@ -117,63 +108,19 @@ def cmd_analyze(app, args):
|
||||
|
||||
csv_file = args.args[0] if args.args else None
|
||||
|
||||
# Use enhanced analyzer if fields are specified or update flag is set
|
||||
if args.fields or args.update:
|
||||
from pathlib import Path
|
||||
import sys
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from enhanced_analyzer import EnhancedPostAnalyzer
|
||||
|
||||
if not csv_file:
|
||||
csv_file = app._find_latest_export()
|
||||
|
||||
if not csv_file:
|
||||
print("❌ No CSV file found. Provide one or run export first.")
|
||||
return 1
|
||||
|
||||
print(f"Using enhanced analyzer with fields: {args.fields or 'all'}")
|
||||
analyzer = EnhancedPostAnalyzer(csv_file, analyze_fields=args.fields)
|
||||
output_file = analyzer.run(
|
||||
output_file=args.output,
|
||||
update_input=args.update
|
||||
)
|
||||
print(f"✅ Analysis completed! Results: {output_file}")
|
||||
else:
|
||||
app.analyze(csv_file)
|
||||
print(f"Analyzing with fields: {args.fields or 'all'}")
|
||||
if args.update:
|
||||
print(f"Will update input CSV (backup will be created)")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_recategorize(app, args):
|
||||
"""Recategorize posts with AI."""
|
||||
if args.dry_run:
|
||||
print("Would recategorize posts with AI suggestions")
|
||||
return 0
|
||||
result = app.analyze(
|
||||
csv_file=csv_file,
|
||||
fields=args.fields,
|
||||
update=args.update,
|
||||
output=args.output
|
||||
)
|
||||
|
||||
csv_file = args.args[0] if args.args else None
|
||||
app.recategorize(csv_file)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_seo_check(app, args):
|
||||
"""Check SEO quality."""
|
||||
if args.dry_run:
|
||||
print("Would check SEO quality of titles/descriptions")
|
||||
return 0
|
||||
|
||||
app.seo_check(top_n=args.top_n)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_categories(app, args):
|
||||
"""Manage categories."""
|
||||
if args.dry_run:
|
||||
print("Would manage categories across all sites")
|
||||
return 0
|
||||
|
||||
app.categories()
|
||||
if result:
|
||||
print(f"✅ Analysis completed! Results: {result}")
|
||||
return 0
|
||||
|
||||
|
||||
@@ -185,47 +132,10 @@ def cmd_category_propose(app, args):
|
||||
|
||||
csv_file = args.args[0] if args.args else None
|
||||
|
||||
if not csv_file:
|
||||
csv_file = app._find_latest_export()
|
||||
result = app.category_propose(csv_file=csv_file, output=args.output)
|
||||
|
||||
if not csv_file:
|
||||
print("❌ No CSV file found. Provide one or run export first.")
|
||||
print(" Usage: seo category_propose <csv_file>")
|
||||
return 1
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from category_proposer import CategoryProposer
|
||||
|
||||
print(f"Proposing categories for: {csv_file}")
|
||||
proposer = CategoryProposer(csv_file)
|
||||
output_file = proposer.run(output_file=args.output)
|
||||
|
||||
print(f"✅ Category proposals saved to: {output_file}")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_approve(app, args):
|
||||
"""Approve recommendations."""
|
||||
if args.dry_run:
|
||||
print("Would review and approve recommendations")
|
||||
return 0
|
||||
|
||||
files = args.args if args.args else None
|
||||
app.approve(files)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_full_pipeline(app, args):
|
||||
"""Run full pipeline."""
|
||||
if args.dry_run:
|
||||
print("Would run full pipeline: export → analyze → seo_check")
|
||||
return 0
|
||||
|
||||
app.full_pipeline()
|
||||
if result:
|
||||
print(f"✅ Category proposals saved to: {result}")
|
||||
return 0
|
||||
|
||||
|
||||
@@ -256,23 +166,15 @@ SEO Automation CLI - Available Commands
|
||||
Basic Commands:
|
||||
export Export all posts from WordPress sites
|
||||
analyze [csv_file] Analyze posts with AI
|
||||
analyze -f title categories Analyze specific fields only
|
||||
analyze -u Update input CSV with new columns
|
||||
recategorize [csv_file] Recategorize posts with AI
|
||||
seo_check Check SEO quality of titles/descriptions
|
||||
categories Manage categories across sites
|
||||
analyze -f title Analyze specific fields (title, meta_description, categories, site)
|
||||
analyze -u Update input CSV with new columns (creates backup)
|
||||
category_propose [csv] Propose categories based on content
|
||||
approve [files...] Review and approve recommendations
|
||||
full_pipeline Run complete workflow: export → analyze → seo_check
|
||||
|
||||
Utility:
|
||||
status Show output files status
|
||||
help Show this help message
|
||||
|
||||
Options:
|
||||
--verbose, -v Enable verbose logging
|
||||
--dry-run Show what would be done without doing it
|
||||
--top-n N Number of top posts for AI analysis (default: 10)
|
||||
--fields, -f Fields to analyze: title, meta_description, categories, site
|
||||
--update, -u Update input CSV file (creates backup)
|
||||
--output, -o Output file path
|
||||
@@ -284,8 +186,6 @@ Examples:
|
||||
seo analyze -f title categories
|
||||
seo analyze -u -f meta_description
|
||||
seo category_propose
|
||||
seo approve output/category_proposals_*.csv
|
||||
seo full_pipeline
|
||||
seo status
|
||||
""")
|
||||
return 0
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
"""
|
||||
Post Exporter Module - Export posts from WordPress sites
|
||||
Post Exporter - Export posts from WordPress sites
|
||||
"""
|
||||
|
||||
import csv
|
||||
import logging
|
||||
import time
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
import re
|
||||
|
||||
from .config import Config
|
||||
|
||||
@@ -26,7 +26,7 @@ class PostExporter:
|
||||
self.all_posts = []
|
||||
self.category_cache = {}
|
||||
|
||||
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, str]:
|
||||
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, Dict]:
|
||||
"""Fetch category names from a WordPress site."""
|
||||
if site_name in self.category_cache:
|
||||
return self.category_cache[site_name]
|
||||
@@ -61,8 +61,6 @@ class PostExporter:
|
||||
|
||||
for status in ['publish', 'draft']:
|
||||
page = 1
|
||||
status_count = 0
|
||||
|
||||
while True:
|
||||
try:
|
||||
logger.info(f" Fetching page {page} ({status} posts)...")
|
||||
@@ -79,19 +77,16 @@ class PostExporter:
|
||||
break
|
||||
|
||||
posts.extend(page_posts)
|
||||
status_count += len(page_posts)
|
||||
logger.info(f" ✓ Got {len(page_posts)} posts (total: {len(posts)})")
|
||||
logger.info(f" ✓ Got {len(page_posts)} posts")
|
||||
|
||||
page += 1
|
||||
time.sleep(0.5)
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if response.status_code == 400:
|
||||
logger.info(f" ℹ API limit reached (got {status_count} {status} posts)")
|
||||
break
|
||||
else:
|
||||
logger.error(f"Error on page {page}: {e}")
|
||||
break
|
||||
logger.error(f"Error on page {page}: {e}")
|
||||
break
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Error fetching from {site_name}: {e}")
|
||||
break
|
||||
@@ -160,7 +155,7 @@ class PostExporter:
|
||||
|
||||
if not self.all_posts:
|
||||
logger.error("No posts to export")
|
||||
return None
|
||||
return ""
|
||||
|
||||
fieldnames = [
|
||||
'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id',
|
||||
@@ -178,10 +173,10 @@ class PostExporter:
|
||||
logger.info(f"✓ CSV exported to: {output_file}")
|
||||
return str(output_file)
|
||||
|
||||
def run(self):
|
||||
def run(self) -> str:
|
||||
"""Run the complete export process."""
|
||||
logger.info("="*70)
|
||||
logger.info("EXPORTING ALL POSTS FOR AI DECISION MAKING")
|
||||
logger.info("EXPORTING ALL POSTS")
|
||||
logger.info("="*70)
|
||||
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
|
||||
|
||||
@@ -196,31 +191,7 @@ class PostExporter:
|
||||
|
||||
if not self.all_posts:
|
||||
logger.error("No posts found on any site")
|
||||
return
|
||||
return ""
|
||||
|
||||
self.all_posts.sort(key=lambda x: (x['site'], x['post_id']))
|
||||
self.export_to_csv()
|
||||
|
||||
# Print summary
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("EXPORT SUMMARY")
|
||||
logger.info("="*70)
|
||||
|
||||
by_site = {}
|
||||
for post in self.all_posts:
|
||||
site = post['site']
|
||||
if site not in by_site:
|
||||
by_site[site] = {'total': 0, 'published': 0, 'draft': 0}
|
||||
by_site[site]['total'] += 1
|
||||
if post['status'] == 'publish':
|
||||
by_site[site]['published'] += 1
|
||||
else:
|
||||
by_site[site]['draft'] += 1
|
||||
|
||||
for site, stats in sorted(by_site.items()):
|
||||
logger.info(f"\n{site}:")
|
||||
logger.info(f" Total: {stats['total']}")
|
||||
logger.info(f" Published: {stats['published']}")
|
||||
logger.info(f" Drafts: {stats['draft']}")
|
||||
|
||||
logger.info(f"\n✓ Export complete!")
|
||||
return self.export_to_csv()
|
||||
|
||||
@@ -1,15 +1,19 @@
|
||||
"""
|
||||
Recategorizer Module - AI-powered post recategorization
|
||||
Placeholder for future implementation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
# Import from scripts directory (parent of src)
|
||||
scripts_dir = Path(__file__).parents[2] / 'scripts'
|
||||
if str(scripts_dir) not in sys.path:
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from ai_recategorize_posts import PostRecategorizer
|
||||
|
||||
__all__ = ['PostRecategorizer']
|
||||
class PostRecategorizer:
|
||||
"""Post recategorizer (placeholder)."""
|
||||
|
||||
def __init__(self, csv_file):
|
||||
self.csv_file = csv_file
|
||||
logger.warning("PostRecategorizer is a placeholder. Implement full functionality as needed.")
|
||||
|
||||
def run(self):
|
||||
logger.info("Recategorization not yet implemented in integrated package.")
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
"""
|
||||
SEO Checker Module - SEO quality analysis
|
||||
Placeholder for future implementation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
# Import from scripts directory (parent of src)
|
||||
scripts_dir = Path(__file__).parents[2] / 'scripts'
|
||||
if str(scripts_dir) not in sys.path:
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from multi_site_seo_analyzer import MultiSiteSEOAnalyzer
|
||||
|
||||
__all__ = ['MultiSiteSEOAnalyzer']
|
||||
class MultiSiteSEOAnalyzer:
|
||||
"""SEO quality analyzer (placeholder)."""
|
||||
|
||||
def __init__(self):
|
||||
logger.warning("MultiSiteSEOAnalyzer is a placeholder. Implement full functionality as needed.")
|
||||
|
||||
def run(self, use_ai=True, top_n=10):
|
||||
logger.info("SEO check not yet implemented in integrated package.")
|
||||
|
||||
Reference in New Issue
Block a user