User:Vanderwaalforces/checkTranslationAttribution.js

Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// @name        Wikipedia translation attribution checker
// @namespace   https://en.wikipedia.org/
// @version     1.4
// @description Checks if a page is a potential unattributed translation, has suspicious access dates, or was created using the ContentTranslation tool.
// @author      [[User:Vanderwaalforces]]
// @match       https://en.wikipedia.org/wiki/*
// @match       https://en.wikipedia.org/w/index.php?title=*

(function() {
    'use strict';

    // Ensure the script only runs in mainspace (0) or draftspace (118)
    const namespace = mw.config.get('wgNamespaceNumber');
    if (namespace !== 0 && namespace !== 118) return;

    const apiUrl = "https://en.wikipedia.org/w/api.php";
    const pageTitle = mw.config.get('wgPageName');
    const talkPageTitle = "Talk:" + pageTitle;

    // List of valid two-letter and three-letter language codes for interwiki links
    const validLanguageCodes = [
        'ab', 'ar', 'az', 'en', 'bn', 'de', 'fa', 'fi', 'fr', 'es', 'et', 'ru', 'ha', 'he', 'ig', 'is', 'id', 'it', 'ja', 'ka', 'lv', 'pt', 'ml', 'nl', 'pl', 'sk', 'sv', 'sq', 'tr', 'no', 'da', 'cs', 'ko', 'uk', 'zh',
        'ace', 'arc', 'arz', 'ast', 'bat', 'bcl', 'bjn', 'bpy', 'bug', 'cbk', 'ceb', 'crh', 'csb', 'diq', 'dsb', 'eml',
        'fiu', 'gag', 'glk', 'hif', 'hsb', 'ilo', 'jv', 'kab', 'kbd', 'ksh', 'lez', 'lmo', 'ltg', 'mai', 'map', 'mdf', 'mhr',
        'min', 'mrj', 'mwl', 'nds', 'nov', 'nrm', 'pag', 'pam', 'pcm', 'pdc', 'pfl', 'pnb', 'roa', 'rue', 'sah', 'scn', 'skr',
        'srn', 'szl', 'tpi', 'vec', 'vep', 'vls', 'war', 'wuu', 'xmf', 'yo', 'yue', 'zea'
    ];

    // Function to fetch edit summaries and the first revision date (first 100 revisions)
    function fetchEditSummaries() {
        return new Promise((resolve, reject) => {
            $.ajax({
                url: apiUrl,
                data: {
                    action: "query",
                    format: "json",
                    prop: "revisions",
                    titles: pageTitle,
                    rvprop: "comment|timestamp",
                    rvlimit: 100,
                    origin: "*"
                },
                success: function(data) {
                    const pages = data.query.pages;
                    const revisions = pages[Object.keys(pages)[0]].revisions;
                    const firstRevisionDate = revisions[revisions.length - 1].timestamp;
                    const comments = revisions.map(rev => rev.comment);

                    // Log the fetched comments and first revision date for debugging
                    console.log("Fetched edit summaries:", comments);
                    console.log("First revision date (from 100 revisions):", firstRevisionDate);

                    resolve({ comments, firstRevisionDate });
                },
                error: function(err) {
                    reject(err);
                }
            });
        });
    }

    // Function to fetch the very first revision of the article (creation edit)
    function fetchFirstRevision() {
        return new Promise((resolve, reject) => {
            $.ajax({
                url: apiUrl,
                data: {
                    action: "query",
                    format: "json",
                    prop: "revisions",
                    titles: pageTitle,
                    rvprop: "comment|tags|timestamp",  // Include timestamp to get the creation date
                    rvdir: "newer",  // Fetch from the oldest revision
                    rvlimit: 1,      // Only fetch the first revision
                    origin: "*"
                },
                success: function(data) {
                    const pages = data.query.pages;
                    const revisions = pages[Object.keys(pages)[0]].revisions;
                    const firstRevision = revisions[0]; // Get the first revision
                    console.log("First revision fetched:", firstRevision);
                    resolve(firstRevision);  // Now includes the creation timestamp
                },
                error: function(err) {
                    reject(err);
                }
            });
        });
    }

    // Function to fetch article wikitext for citation checks
    function fetchWikitext() {
        return new Promise((resolve, reject) => {
            $.ajax({
                url: apiUrl,
                data: {
                    action: "query",
                    format: "json",
                    prop: "revisions",
                    titles: pageTitle,
                    rvprop: "content",
                    origin: "*"
                },
                success: function(data) {
                    const pages = data.query.pages;
                    const pageData = pages[Object.keys(pages)[0]];
                    if (pageData.revisions && pageData.revisions[0]) {
                        const wikitext = pageData.revisions[0]['*'];

                        // Log the wikitext for debugging
                        console.log("Fetched wikitext:", wikitext);

                        resolve(wikitext);
                    } else {
                        console.log("No wikitext found");
                        resolve(null);  // Return null if wikitext is missing
                    }
                },
                error: function(err) {
                    reject(err);
                }
            });
        });
    }

    // Function to check if talk page contains the word "translat"
    function fetchTalkPageContent() {
        return new Promise((resolve, reject) => {
            $.ajax({
                url: apiUrl,
                data: {
                    action: "query",
                    format: "json",
                    prop: "revisions",
                    titles: talkPageTitle,
                    rvprop: "content",
                    origin: "*"
                },
                success: function(data) {
                    const pages = data.query.pages;
                    const revisions = pages[Object.keys(pages)[0]].revisions;
                    if (revisions && revisions[0] && revisions[0]['*']) {
                        const talkPageContent = revisions[0]['*'].toLowerCase();
                        
                        // Log the talk page content for debugging
                        console.log("Fetched talk page content:", talkPageContent);

                        resolve(talkPageContent.includes("translat"));
                    } else {
                        resolve(false);
                    }
                },
                error: function(err) {
                    reject(err);
                }
            });
        });
    }

    // Helper function to check if a comment contains a valid interwiki link
    function containsInterwikiLink(comment) {
        const interwikiRegex = new RegExp(`\\b(${validLanguageCodes.join('|')}):`, 'i');
        return interwikiRegex.test(comment);
    }

    // Helper function to check if an edit summary contains both "translat" or "import" and "from"
    function containsTranslationKeywords(comment) {
        const lowerComment = comment.toLowerCase();
        const hasTranslatAndFrom = lowerComment.includes("translat") && lowerComment.includes("from");
        const hasImportedAndFrom = lowerComment.includes("imported") && lowerComment.includes("from");
        const hasImportingAndFrom = lowerComment.includes("importing") && lowerComment.includes("from");

        // Log the status of each summary's keyword check
        console.log(`Summary: ${comment}, Has translat + from: ${hasTranslatAndFrom}, Has imported + from: ${hasImportedAndFrom}, Has importing + from: ${hasImportingAndFrom}`);

        return hasTranslatAndFrom || hasImportedAndFrom || hasImportingAndFrom;
    }

    // Function to classify edit summaries based on the refined algorithm
    function classifyEditSummaries(editSummaries) {
        let hasTranslatNoInterwiki = false;
        let hasTranslatWithInterwiki = false;

        editSummaries.forEach(summary => {
            if (containsTranslationKeywords(summary)) {
                const hasInterwiki = containsInterwikiLink(summary);
                if (hasInterwiki) {
                    hasTranslatWithInterwiki = true;
                } else {
                    hasTranslatNoInterwiki = true;
                }
            }
        });

        return { hasTranslatNoInterwiki, hasTranslatWithInterwiki };
    }

    // Improved date regex to handle various date formats
    function parseDate(dateString) {
        const regexes = [
            /\b(\d{1,2})\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s(\d{4})\b/,  // e.g. "21 January 2024"
            /\b(\d{4})-(\d{2})-(\d{2})\b/,  // e.g. "2024-01-21"
            /\b(\d{1,2})-(\d{1,2})-(\d{4})\b/,  // e.g. "21-01-2024"
            /\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s(\d{1,2}),?\s(\d{4})\b/  // e.g. "January 21, 2024"
        ];

        for (const regex of regexes) {
            const match = regex.exec(dateString);
            if (match) {
                return new Date(match[0]);
            }
        }
        return null;  // Return null if no valid date format is found
    }

    // Function to check for suspicious access dates in the article's wikitext
    function checkSuspiciousAccessDates(wikitext, firstRevisionDate) {
        if (!wikitext) {
            console.log("No wikitext to check for access dates");
            return false;
        }

        const accessDateRegex = /\|\s*access[- ]date\s*=\s*([A-Za-z0-9, -]+)/g;
        const matches = [...wikitext.matchAll(accessDateRegex)];
        const suspiciousDates = [];

        const firstRevisionParsed = new Date(firstRevisionDate);
        const firstRevisionDateOnly = new Date(firstRevisionParsed.toISOString().split('T')[0]); // Strip time part

        matches.forEach(match => {
            const accessDate = match[1];
            const accessDateParsed = parseDate(accessDate);
            if (accessDateParsed) {
                const accessDateOnly = new Date(accessDateParsed.toISOString().split('T')[0]); // Strip time part

                // Check if the access date is strictly before the article's creation date
                if (!isNaN(accessDateParsed.getTime()) && accessDateOnly < firstRevisionDateOnly) {
                    suspiciousDates.push(accessDate);
                }
            }
        });

        // Log any suspicious dates for debugging
        console.log("Suspicious access dates:", suspiciousDates);

        return suspiciousDates.length > 0;
    }

    // Function to display a message before the #contentSub element, with a dismiss button in the top-right corner
    function displayMessage(text, color) {
        const messageDiv = document.createElement("div");
        messageDiv.style.backgroundColor = color;
        messageDiv.style.color = "white";
        messageDiv.style.padding = "15px";
        messageDiv.style.textAlign = "center";
        messageDiv.style.fontSize = "16px";
        messageDiv.style.fontWeight = "bold";
        messageDiv.style.position = "relative";  // Required for dismiss button positioning
        messageDiv.innerHTML = text;  // Use innerHTML for parsing link elements

        // Add dismiss button at the very top-right corner
        const dismissButton = document.createElement("button");
        dismissButton.textContent = "x";
        dismissButton.style.position = "absolute";
        dismissButton.style.top = "5px";
        dismissButton.style.right = "10px";
        dismissButton.style.backgroundColor = "#ff5f5f";
        dismissButton.style.border = "none";
        dismissButton.style.color = "white";
        dismissButton.style.padding = "5px 10px";
        dismissButton.style.cursor = "pointer";
        dismissButton.style.fontWeight = "bold";
        dismissButton.style.borderRadius = "5px";
        dismissButton.onclick = () => {
            messageDiv.style.display = "none";
        };
        messageDiv.appendChild(dismissButton);

        // Insert the message before the #contentSub element to make it compatible with all skins
        $('#contentSub').before(messageDiv);
    }

    // Main logic
    async function checkTranslationAttribution() {
        try {
            // Fetch the first revision to check for ContentTranslation tool and creation date
            const firstRevision = await fetchFirstRevision();
            const firstComment = firstRevision.comment;
            const firstTags = firstRevision.tags;
            const firstRevisionDate = firstRevision.timestamp;  // Fetch the timestamp of the first revision
            
            // Define the constants for links to WP:TFOLWP and the Translated from template
        	const wpShortcutLink = mw.html.element('a', {
            	href: '/wiki/WP:TFOLWP',
            	target: '_blank',
            	style: 'color:white; text-decoration:underline;'
        	}, '[[WP:TFOLWP]]');
        
        	const templateLink = mw.html.element('a', {
            	href: '/wiki/Template:Translated_from',
            	target: '_blank',
            	style: 'color:white; text-decoration:underline;'
        	}, '{{Translated from}}');

            // If the first edit used the ContentTranslation tool, display the green banner and skip other checks
            if (firstComment.includes("Created by translating the page") && firstTags.includes("contenttranslation")) {
                const contentTranslationLink = mw.html.element('a', {
                    href: 'https://www.mediawiki.org/wiki/Special:MyLanguage/Content_translation',
                    target: '_blank',
                    style: 'color:white; text-decoration:underline;'
                }, 'ContentTranslation');
                
                displayMessage(`This article was created using the ${contentTranslationLink} module and is correctly attributed to the source Wikipedia.`, "green");
                return; // Skip all other checks if ContentTranslation tool was used
            }

            // Continue with other checks for suspicious access dates and translation attribution
            const { comments } = await fetchEditSummaries();  // Use only for edit summaries now
            const { hasTranslatNoInterwiki, hasTranslatWithInterwiki } = classifyEditSummaries(comments);

            const wikitext = await fetchWikitext();
            const hasSuspiciousAccessDates = checkSuspiciousAccessDates(wikitext, firstRevisionDate);  // Use actual creation date

            // Display singular orange banner if there are suspicious access dates (no translation detected)
            if (hasSuspiciousAccessDates && !hasTranslatNoInterwiki && !hasTranslatWithInterwiki) {
                displayMessage("Warning: There are citations in this article that have access dates from before the article was created. This suggests the article may have been copy-pasted from somewhere.", "orange");
                return; // Only show the orange banner and stop further checks
            }

            // If suspicious dates are found and the article is correctly attributed
            if (hasSuspiciousAccessDates && hasTranslatWithInterwiki) {
                displayMessage("Notice: Despite some citations having access dates before the article's creation, indicating possible copy-pasting or interwiki translation, proper attribution has been given.", "green");
            }
            // If suspicious dates are found and no proper attribution
            else if (hasSuspiciousAccessDates && hasTranslatNoInterwiki) {
                displayMessage(`Warning: This article is likely an unattributed translation. Please see ${wpShortcutLink} for proper attribution, and consider adding ${templateLink} to the talk page.`, "red");
                displayMessage("Warning: There are citations in this article that have access dates from before the article was created. This suggests the article may have been copy-pasted from somewhere.", "orange");
            }
            // If there are no suspicious dates and the article is correctly attributed
            else if (!hasSuspiciousAccessDates && hasTranslatWithInterwiki) {
                const hasTranslatInTalkPage = await fetchTalkPageContent();
                if (!hasTranslatInTalkPage) {
                    displayMessage(`Notice: This translated article has been correctly attributed. Consider optionally adding ${templateLink} to the talk page.`, "green");
                } else {
                    displayMessage("Notice: This translated article has been correctly attributed.", "green");
                }
            }
            // If there are no suspicious dates and no proper attribution
            else if (!hasSuspiciousAccessDates && hasTranslatNoInterwiki) {
                displayMessage(`Warning: This article is likely an unattributed translation. Please see ${wpShortcutLink} for proper attribution, and consider adding ${templateLink} to the talk page.`, "red");
            }
        } catch (error) {
            console.error("Error checking translation attribution:", error);
        }
    }

    // Run the check
    checkTranslationAttribution();
})();