"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const types_1 = require("./types");
const InteropService_Importer_Base_1 = require("./InteropService_Importer_Base");
const path_utils_1 = require("../../path-utils");
const InteropService_Importer_Md_1 = require("./InteropService_Importer_Md");
const path_1 = require("path");
const Logger_1 = require("@joplin/utils/Logger");
const uuid_1 = require("../../uuid");
const shim_1 = require("../../shim");
const logger = Logger_1.default.create('InteropService_Importer_OneNote');
// See onenote-converter README.md for more information
class InteropService_Importer_OneNote extends InteropService_Importer_Base_1.default {
    constructor() {
        super(...arguments);
        this.importedNotes = {};
        this.domParser = null;
        this.xmlSerializer = null;
    }
    async init(sourcePath, options) {
        await super.init(sourcePath, options);
        if (!options.domParser || !options.xmlSerializer) {
            throw new Error('OneNote importer requires DOMParser and XMLSerializer to be able to extract SVG from HTML.');
        }
        this.domParser = options.domParser;
        this.xmlSerializer = options.xmlSerializer;
    }
    getEntryDirectory(unzippedPath, entryName) {
        const withoutBasePath = entryName.replace(unzippedPath, '');
        return (0, path_1.normalize)(withoutBasePath).split(path_1.sep)[0];
    }
    async extractFiles_(sourcePath, targetPath) {
        const fileExtension = (0, path_1.extname)(sourcePath).toLowerCase();
        const fileNameNoExtension = (0, path_1.basename)(sourcePath, (0, path_1.extname)(sourcePath));
        if (fileExtension === '.zip') {
            logger.info('Unzipping files...');
            await shim_1.default.fsDriver().zipExtract({ source: sourcePath, extractTo: targetPath });
        }
        else if (fileExtension === '.one') {
            logger.info('Copying file...');
            const outputDirectory = (0, path_1.join)(targetPath, fileNameNoExtension);
            await shim_1.default.fsDriver().mkdir(outputDirectory);
            await shim_1.default.fsDriver().copy(sourcePath, (0, path_1.join)(outputDirectory, (0, path_1.basename)(sourcePath)));
        }
        else if (fileExtension === '.onepkg') {
            // Change the file extension so that the archive can be extracted
            const archivePath = (0, path_1.join)(targetPath, `${fileNameNoExtension}.cab`);
            await shim_1.default.fsDriver().copy(sourcePath, archivePath);
            const extractPath = (0, path_1.join)(targetPath, fileNameNoExtension);
            await shim_1.default.fsDriver().mkdir(extractPath);
            await shim_1.default.fsDriver().cabExtract({
                source: archivePath,
                extractTo: extractPath,
                // Only the .one files are used--there's no need to extract
                // other files.
                fileNamePattern: '*.one',
            });
            await this.fixIncorrectLatin1Decoding_(extractPath);
        }
        else {
            throw new Error(`Unknown file extension: ${fileExtension}`);
        }
        return await shim_1.default.fsDriver().readDirStats(targetPath, { recursive: true });
    }
    async execImpl_(result, unzipTempDirectory, tempOutputDirectory) {
        var _a, _b, _c, _d, _e;
        const sourcePath = (0, path_utils_1.rtrimSlashes)(this.sourcePath_);
        const files = await this.extractFiles_(sourcePath, unzipTempDirectory);
        if (files.length === 0) {
            result.warnings.push('Zip file has no files.');
            return result;
        }
        const baseFolder = this.getEntryDirectory(unzipTempDirectory, files[0].path);
        const notebookBaseDir = (0, path_1.join)(unzipTempDirectory, baseFolder, path_1.sep);
        const outputDirectory2 = (0, path_1.join)(tempOutputDirectory, baseFolder);
        const notebookFiles = files.filter(e => {
            return (0, path_1.extname)(e.path) !== '.onetoc2' && (0, path_1.basename)(e.path) !== 'OneNote_RecycleBin.onetoc2';
        });
        const { oneNoteConverter } = shim_1.default.requireDynamic('@joplin/onenote-converter');
        logger.info('Extracting OneNote to HTML');
        const skippedFiles = [];
        for (const notebookFile of notebookFiles) {
            const notebookFilePath = (0, path_1.join)(unzipTempDirectory, notebookFile.path);
            // In some cases, the OneNote zip file can include folders and other files
            // that shouldn't be imported directly. Skip these:
            if (!['.one', '.onetoc2'].includes((0, path_1.extname)(notebookFilePath).toLowerCase())) {
                logger.info('Skipping non-OneNote file:', notebookFile.path);
                skippedFiles.push(notebookFile.path);
                continue;
            }
            try {
                await oneNoteConverter(notebookFilePath, (0, path_1.resolve)(outputDirectory2), notebookBaseDir);
            }
            catch (error) {
                // Forward only the error message. Usually the stack trace points to bytes in the WASM file.
                // It's very difficult to use and can cause the error report to be longer than the maximum
                // length for auto-creating a forum post:
                (_b = (_a = this.options_).onError) === null || _b === void 0 ? void 0 : _b.call(_a, (_c = error.message) !== null && _c !== void 0 ? _c : error);
                console.error(error);
            }
        }
        if (skippedFiles.length === notebookFiles.length) {
            (_e = (_d = this.options_).onError) === null || _e === void 0 ? void 0 : _e.call(_d, new Error(`None of the files appear to be from OneNote. Skipped files include: ${JSON.stringify(skippedFiles)}`));
        }
        logger.info('Postprocessing imported content...');
        await this.postprocessGeneratedHtml_(tempOutputDirectory);
        logger.info('Importing HTML into Joplin');
        const importer = new InteropService_Importer_Md_1.default();
        importer.setMetadata({ fileExtensions: ['html'] });
        await importer.init(tempOutputDirectory, Object.assign(Object.assign({}, this.options_), { format: 'html', outputFormat: types_1.ImportModuleOutputFormat.Html }));
        logger.info('Finished');
        result = await importer.exec(result);
        return result;
    }
    async exec(result) {
        const unzipTempDirectory = await this.temporaryDirectory_(true);
        const tempOutputDirectory = await this.temporaryDirectory_(true);
        try {
            return await this.execImpl_(result, unzipTempDirectory, tempOutputDirectory);
        }
        finally {
            await shim_1.default.fsDriver().remove(unzipTempDirectory);
            await shim_1.default.fsDriver().remove(tempOutputDirectory);
        }
    }
    async buildIdMap_(baseFolder) {
        const htmlFiles = await this.getValidHtmlFiles_((0, path_1.resolve)(baseFolder));
        const pageIdToPath = new Map();
        for (const file of htmlFiles) {
            const fullPath = (0, path_1.join)(baseFolder, file.path);
            const html = await shim_1.default.fsDriver().readFile(fullPath);
            const metaTagMatch = html.match(/<meta name="X-Original-Page-Id" content="([^"]+)"/i);
            if (metaTagMatch) {
                const pageId = metaTagMatch[1];
                pageIdToPath.set(pageId.toUpperCase(), fullPath);
            }
        }
        return {
            get: (id) => {
                const path = pageIdToPath.get(id.toUpperCase());
                if (path) {
                    return { path };
                }
                return null;
            },
        };
    }
    async postprocessGeneratedHtml_(baseFolder) {
        const htmlFiles = await this.getValidHtmlFiles_((0, path_1.resolve)(baseFolder));
        const pipeline = [
            (dom, currentFolder) => this.extractSvgsToFiles_(dom, currentFolder),
            (dom, currentFolder) => this.convertExternalLinksToInternalLinks_(dom, currentFolder),
            (dom, _currentFolder) => Promise.resolve(this.simplifyHtml_(dom)),
        ];
        for (const file of htmlFiles) {
            const fileLocation = (0, path_1.join)(baseFolder, file.path);
            const originalHtml = await shim_1.default.fsDriver().readFile(fileLocation);
            const dom = this.domParser.parseFromString(originalHtml, 'text/html');
            let changed = false;
            for (const task of pipeline) {
                const result = await task(dom, (0, path_1.dirname)(fileLocation));
                changed || (changed = result);
            }
            if (changed) {
                // Don't use xmlSerializer here: It breaks <style> blocks.
                const updatedHtml = `<!DOCTYPE HTML>\n${dom.documentElement.outerHTML}`;
                await shim_1.default.fsDriver().writeFile(fileLocation, updatedHtml, 'utf-8');
            }
        }
    }
    async getValidHtmlFiles_(baseFolder) {
        const files = await shim_1.default.fsDriver().readDirStats(baseFolder, { recursive: true });
        const htmlFiles = files.filter(f => !f.isDirectory() && f.path.endsWith('.html'));
        return htmlFiles;
    }
    async convertExternalLinksToInternalLinks_(dom, baseFolder) {
        let idMap_ = null;
        const idMap = async () => {
            idMap_ !== null && idMap_ !== void 0 ? idMap_ : (idMap_ = await this.buildIdMap_(baseFolder));
            return idMap_;
        };
        const links = dom.querySelectorAll('a[href^="onenote"]');
        let changed = false;
        for (const link of links) {
            if (!link.href.startsWith('onenote:'))
                continue;
            // Remove everything before the first query parameter (e.g. &section-id=).
            const separatorIndex = link.href.indexOf('&');
            const prefixRemoved = link.href.substring(separatorIndex);
            const params = new URLSearchParams(prefixRemoved);
            const pageId = params.get('page-id');
            const targetPage = (await idMap()).get(pageId);
            // The target page might be in a different notebook (imported separately)
            if (!targetPage) {
                logger.info('Page not found for internal link. Page ID: ', pageId);
            }
            else {
                changed = true;
                link.href = (0, path_1.relative)(baseFolder, targetPage.path);
            }
        }
        return changed;
    }
    simplifyHtml_(dom) {
        const selectors = [
            // <script> blocks that aren't marked with a specific type (e.g. application/tex).
            'script:not([type])',
            // ID mappings (unused at this stage of the import process)
            'meta[name="X-Original-Page-Id"]',
            // Empty iframes
            'iframe[src=""]',
        ];
        let changed = false;
        for (const selector of selectors) {
            for (const element of dom.querySelectorAll(selector)) {
                element.remove();
                changed = true;
            }
        }
        return changed;
    }
    async extractSvgsToFiles_(dom, svgBaseFolder) {
        const { svgs, changed } = this.extractSvgs(dom);
        for (const svg of svgs) {
            await shim_1.default.fsDriver().writeFile((0, path_1.join)(svgBaseFolder, svg.title), svg.content, 'utf8');
        }
        return changed;
    }
    // Public to allow testing:
    extractSvgs(dom, titleGenerator = () => (0, uuid_1.uuidgen)(10)) {
        // get all "top-level" SVGS (ignore nested)
        const svgNodeList = dom.querySelectorAll('svg');
        if (!svgNodeList || !svgNodeList.length) {
            return { svgs: [], changed: false };
        }
        const svgs = [];
        for (const svgNode of svgNodeList) {
            const img = dom.createElement('img');
            if (svgNode.hasAttribute('style')) {
                img.setAttribute('style', svgNode.getAttribute('style'));
                svgNode.removeAttribute('style');
            }
            for (const entry of svgNode.classList) {
                img.classList.add(entry);
            }
            if (svgNode.hasAttribute('style')) {
                img.setAttribute('style', svgNode.getAttribute('style'));
                svgNode.removeAttribute('style');
            }
            // A11Y: Translate SVG titles to ALT text
            // See https://developer.mozilla.org/en-US/docs/Web/SVG/Reference/Element/title
            const titleElement = svgNode.querySelector('title');
            if (titleElement) {
                img.alt = titleElement.textContent;
            }
            const title = `${titleGenerator()}.svg`;
            img.setAttribute('src', `./${title}`);
            svgs.push({
                title,
                content: this.xmlSerializer.serializeToString(svgNode),
            });
            svgNode.replaceWith(img);
        }
        return {
            svgs,
            changed: true,
        };
    }
    // Works around a decoding issue in which file names are extracted as latin1 strings,
    // rather than UTF-8 strings. For example, OneNote seems to encode filenames as UTF-8 in .onepkg files.
    // However, EXPAND.EXE reads the filenames as latin1. As a result, "é.one" becomes
    // "Ã©.one" when extracted from the archive.
    // This workaround re-encodes filenames as UTF-8.
    async fixIncorrectLatin1Decoding_(parentDir) {
        // Only seems to be necessary on Windows.
        if (!shim_1.default.isWindows())
            return;
        const fixEncoding = async (basePath, fileName) => {
            const originalPath = (0, path_1.join)(basePath, fileName);
            let newPath;
            let fixedFileName = Buffer.from(fileName, 'latin1').toString('utf8');
            if (fixedFileName !== fileName) {
                // In general, the path shouldn't start with "."s or contain path separators.
                // However, if it does, these characters might cause import errors, so remove them:
                fixedFileName = fixedFileName.replace(/^\.+/, '');
                fixedFileName = fixedFileName.replace(/[/\\]/g, ' ');
                // Avoid path traversal: Ensure that the file path is contained within the base directory
                const newFullPathSafe = shim_1.default.fsDriver().resolveRelativePathWithinDir(basePath, fixedFileName);
                await shim_1.default.fsDriver().move(originalPath, newFullPathSafe);
                newPath = newFullPathSafe;
            }
            else {
                newPath = originalPath;
            }
            if (await shim_1.default.fsDriver().isDirectory(originalPath)) {
                const children = await shim_1.default.fsDriver().readDirStats(newPath, { recursive: false });
                for (const child of children) {
                    await fixEncoding(originalPath, child.path);
                }
            }
        };
        const stats = await shim_1.default.fsDriver().readDirStats(parentDir, { recursive: false });
        for (const stat of stats) {
            await fixEncoding(parentDir, stat.path);
        }
    }
}
exports.default = InteropService_Importer_OneNote;
//# sourceMappingURL=InteropService_Importer_OneNote.js.map