This repository has been archived on 2025-08-21. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
hugo-mistergeek/scripts/generate-content.js
2025-08-19 13:30:54 +02:00

212 lines
7.1 KiB
JavaScript

const fs = require('fs');
const path = require('path');
const he = require('he');
const DATA_DIR = path.join(__dirname, '..', 'data', 'wordpress');
const CONTENT_DIR = path.join(__dirname, '..', 'content');
const PAGES_DIR = path.join(CONTENT_DIR, 'pages');
function generateContent() {
const posts = JSON.parse(fs.readFileSync(path.join(DATA_DIR, 'posts.json'), 'utf8'));
const pages = JSON.parse(fs.readFileSync(path.join(DATA_DIR, 'pages.json'), 'utf8'));
// Ensure content directories exist
if (!fs.existsSync(CONTENT_DIR)) {
fs.mkdirSync(CONTENT_DIR, { recursive: true });
}
if (!fs.existsSync(PAGES_DIR)) {
fs.mkdirSync(PAGES_DIR, { recursive: true });
}
// Process posts - only include published posts
posts.filter(post => post.status === 'publish').forEach(post => {
const slug = post.slug;
const date = new Date(post.date);
const year = date.getFullYear();
const month = String(date.getMonth() + 1).padStart(2, '0');
// Get the primary category (first category in the list)
const primaryCategory = post._embedded?.['wp:term']?.[0]?.[0];
const categorySlug = primaryCategory ? primaryCategory.slug : 'non-classe';
const contentDir = path.join(CONTENT_DIR, categorySlug, `${year}-${month}-${slug}`);
// const contentDir = path.join(CONTENT_DIR, `${year}-${month}-${slug}`);
if (!fs.existsSync(contentDir)) {
fs.mkdirSync(contentDir, { recursive: true });
}
const frontmatter = {
title: he.decode(post.title.rendered),
date: post.date,
draft: false,
slug: slug,
wordpress_id: post.id,
excerpt: he.decode(post.excerpt.rendered.replace(/<[^>]*>/g, '')),
featured_image: post._embedded?.['wp:featuredmedia']?.[0]?.source_url || '',
author: post._embedded?.author?.[0]?.name || 'Unknown',
categories: (post._embedded?.['wp:term']?.[0] || []).map(cat => cat.name || 'Non classé'),
tags: (post._embedded?.['wp:term']?.[1] || []).map(cat => cat.name || 'Non classé'),
section: categorySlug
};
// Decode HTML entities in the content and clean up HTML tags
let contentHtml = he.decode(post.content.rendered);
// Convert absolute URLs in a href to relative URLs (only for wp.mistergeek.net)
contentHtml = contentHtml.replace(/<a\s+[^>]*href="([^"]+)"[^>]*>/g, (match, href) => {
// Check if the href is an absolute URL containing wp.mistergeek.net
if (href.startsWith('http://') || href.startsWith('https://')) {
try {
const url = new URL(href);
if (url.hostname === 'wp.mistergeek.net' || url.hostname === 'www.wp.mistergeek.net') {
// Only convert wp.mistergeek.net URLs to relative paths
return match.replace(href, url.pathname);
}
} catch (error) {
// If URL parsing fails, return the original href
console.warn('Failed to parse URL:', href, error);
}
}
return match;
});
contentHtml = contentHtml
.replace(/<p>\s*<\/p>/g, '') // Remove empty paragraphs
.replace(/<\/p>\s*<p>/g, '\n\n'); // Replace paragraph breaks with newlines
const content = `---
${Object.entries(frontmatter)
.map(([key, value]) => `${key}: ${JSON.stringify(value)}`)
.join('\n')}
---
${contentHtml.trim()}`;
fs.writeFileSync(path.join(contentDir, 'index.md'), content);
});
// Process pages - only include published pages
pages.filter(page => page.status === 'publish').forEach(page => {
const slug = page.slug;
const contentDir = path.join(PAGES_DIR, slug);
if (!fs.existsSync(contentDir)) {
fs.mkdirSync(contentDir, { recursive: true });
}
const frontmatter = {
title: he.decode(page.title.rendered),
slug: slug,
type: "pages",
layout: "single",
wordpress_id: page.id,
date: page.date,
modified: page.modified,
draft: false,
aliases: [`/${slug}/`]
};
// Decode HTML entities in the content and clean up HTML tags
let contentHtml = he.decode(page.content.rendered);
// Convert absolute URLs in a href to relative URLs (only for wp.mistergeek.net)
contentHtml = contentHtml.replace(/<a\s+[^>]*href="([^"]+)"[^>]*>/g, (match, href) => {
// Check if the href is an absolute URL containing wp.mistergeek.net
if (href.startsWith('http://') || href.startsWith('https://')) {
try {
const url = new URL(href);
if (url.hostname === 'wp.mistergeek.net' || url.hostname === 'www.wp.mistergeek.net') {
// Only convert wp.mistergeek.net URLs to relative paths
return match.replace(href, url.pathname);
}
} catch (error) {
// If URL parsing fails, return the original href
console.warn('Failed to parse URL:', href, error);
}
}
return match;
});
contentHtml = contentHtml
.replace(/<p>\s*<\/p>/g, '') // Remove empty paragraphs
.replace(/<\/p>\s*<p>/g, '\n\n'); // Replace paragraph breaks with newlines
const content = `---
${Object.entries(frontmatter)
.map(([key, value]) => `${key}: ${JSON.stringify(value)}`)
.join('\n')}
---
${contentHtml.trim()}`;
fs.writeFileSync(path.join(contentDir, 'index.md'), content);
});
const publishedPosts = posts.filter(post => post.status === 'publish');
const publishedPages = pages.filter(page => page.status === 'publish');
// Generate author directories and index pages
generateAuthorDirectories(publishedPosts);
console.log(`✅ Generated ${publishedPosts.length} content files`);
console.log(`✅ Generated ${publishedPages.length} page files`);
}
function generateAuthorDirectories(posts) {
const AUTHORS_DIR = path.join(CONTENT_DIR, 'author');
// Ensure authors directory exists
if (!fs.existsSync(AUTHORS_DIR)) {
fs.mkdirSync(AUTHORS_DIR, { recursive: true });
}
// Group posts by author
const postsByAuthor = {};
posts.forEach(post => {
const authorName = post._embedded?.author?.[0]?.name || 'Unknown';
const authorSlug = authorName.toLowerCase().replace(/\s+/g, '-').replace(/[^\w-]/g, '');
if (!postsByAuthor[authorSlug]) {
postsByAuthor[authorSlug] = {
name: authorName,
slug: authorSlug,
posts: []
};
}
postsByAuthor[authorSlug].posts.push(post);
});
// Create author directories and index pages
Object.values(postsByAuthor).forEach(author => {
const authorDir = path.join(AUTHORS_DIR, author.slug);
if (!fs.existsSync(authorDir)) {
fs.mkdirSync(authorDir, { recursive: true });
}
// Generate author index page
const frontmatter = {
title: `Lise des articles de ${author.name}`,
type: 'authors',
layout: 'list',
author: author.name,
author_slug: author.slug
};
const content = `---
${Object.entries(frontmatter)
.map(([key, value]) => `${key}: ${JSON.stringify(value)}`)
.join('\n')}
---
`;
fs.writeFileSync(path.join(authorDir, '_index.md'), content);
console.log(`✅ Generated author directory: ${author.name} (${author.posts.length} posts)`);
});
}
generateContent();