Automatically Redirect 404's in HubSpot CMS to a The Closest Page
I needed to automatically redirect thousands of "Not Found" pages for one of our clients by using JavaScript, despite traditionally server-side redirects being the preferred method.
These were not critical top-level pages, but automatically generated pages or variations that no longer exist.
This script activates on your 404 page. If a user hits a non-existent page, the script checks your sitemap, finds the most similar existing page URL, and redirects them there.
For instance, if "/how-to-write-javascript-code/" no longer exists and has been changed to "/blog/write-javascript-code/", the script identifies and redirects to the correct page by matching keywords. It also includes customizable settings for improved flexibility, like excluding certain keywords and setting a minimum word length.
Here's the script:
<script>
// Settings
const baseUrl = 'https://domain.com/'; // Your website root
const sitemapURL = 'https://domain.com/sitemap.xml'; // Your sitemap URL
const notFoundTitleKeyword = 'Not Found'; // A keyword that is present in your "Not Found" page title. This might be "404", "Not Found" etc.
const ignoreWords = ['blog', 'feed', 'guide']; // Common words to ignore. These should be low-value words that are present in most of your URLs.
const minWordLength = 3; // Set the minimum word length in characters. This ignores low-value words like "to", "a", "the", etc.
// Only edit below here if you know what you're doing:
document.addEventListener('DOMContentLoaded', function() {
if (document.title.includes(notFoundTitleKeyword)) {
async function fetchSitemap(url) {
try {
const response = await fetch(url);
const text = await response.text();
const parser = new DOMParser();
const xmlDoc = parser.parseFromString(text, "text/xml");
const urls = xmlDoc.querySelectorAll('loc');
return Array.from(urls).map(node => node.textContent);
} catch (error) {
console.error('Error fetching sitemap:', error);
return [];
}
}
function processCurrentURL() {
const path = window.location.pathname;
let segments = path.split('/').filter(segment => segment.length > 0 && !ignoreWords.includes(segment));
segments = segments.map(segment => {
if (segment.match(/^\d+$/)) return null;
return segment.replace(/-/g, ' ');
}).filter(segment => segment != null && segment.length > minWordLength);
return segments.join(' ').split(' ').filter(word => word.length > minWordLength);
}
function findClosestMatch(currentKeywords, urls) {
let bestMatch = '';
let bestScore = 0;
urls.forEach(url => {
const urlParts = url.replace(baseUrl, '').split('/').filter(part => part.length);
let score = 0;
currentKeywords.forEach(keyword => {
if(urlParts.some(part => part.includes(keyword))) score++;
});
if(score > bestScore) {
bestScore = score;
bestMatch = url;
}
});
return bestMatch;
}
async function redirectToClosestMatch() {
const sitemapUrls = await fetchSitemap(sitemapURL);
if (sitemapUrls.length === 0) {
console.log('Sitemap is empty or could not be fetched.');
return;
}
const currentKeywords = processCurrentURL();
const closestMatch = findClosestMatch(currentKeywords, sitemapUrls);
if(closestMatch) {
console.log('Redirecting to:', closestMatch);
window.location.replace(closestMatch);
} else {
console.log('No close match found.');
}
}
redirectToClosestMatch();
}
else {
console.log('Incorrect page title:', document.title);
}
});
</script>
To add this to your site:
- Click the gear in the upper right
- Click the "Website" dropdown under "Tools" in the bottom left
- Click "Pages"
- Add to your site footer HTML.
All done! Now, go try visiting the wrong URL or adding a couple of extra characters after one of your blog post links. You'll see it gracefully redirects to the closest page.
While this was designed specifically for a client on HubSpot, it can theoretically work just about any CMS.
What do you think? Leave me a comment below!
Comments