"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const test_utils_1 = require("../../testing/test-utils");
const OcrService_1 = require("./OcrService");
const Resource_1 = require("../../models/Resource");
const types_1 = require("../database/types");
const time_1 = require("@joplin/utils/time");
const Logger_1 = require("@joplin/utils/Logger");
const Setting_1 = require("../../models/Setting");
describe('OcrService', () => {
    jest.retryTimes(2);
    beforeEach(async () => {
        await (0, test_utils_1.setupDatabaseAndSynchronizer)(1);
        await (0, test_utils_1.setupDatabaseAndSynchronizer)(2);
        await (0, test_utils_1.switchClient)(1);
    });
    it('should process resources', async () => {
        const { resource: resource1 } = await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/testocr.png` });
        const { resource: resource2 } = await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.supportDir}/photo.jpg` });
        const { resource: resource3 } = await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/with_bullets.png` });
        // Wait to make sure that updated_time is updated
        await (0, time_1.msleep)(1);
        expect(await Resource_1.default.needOcrCount(OcrService_1.supportedMimeTypes)).toBe(3);
        const service = (0, test_utils_1.newOcrService)();
        await service.processResources();
        const expectedText = 'This is a lot of 12 point text to test the\n' +
            'ocr code and see if it works on all types\n' +
            'of file format.\n' +
            'The quick brown dog jumped over the\n' +
            'lazy fox. The quick brown dog jumped\n' +
            'over the lazy fox. The quick brown dog\n' +
            'jumped over the lazy fox. The quick\n' +
            'brown dog jumped over the lazy fox.';
        const processedResource1 = await Resource_1.default.load(resource1.id);
        expect(processedResource1.ocr_text).toBe(expectedText);
        expect(processedResource1.ocr_status).toBe(types_1.ResourceOcrStatus.Done);
        expect(processedResource1.ocr_error).toBe('');
        const details = Resource_1.default.unserializeOcrDetails(processedResource1.ocr_details);
        const lines = details.map(l => l.words.map(w => w.t).join(' ')).join('\n');
        expect(lines).toBe(expectedText);
        expect(details[0].words[0].t).toBe('This');
        expect(details[0].words[0]).toEqual({ 't': 'This', 'bb': [36, 96, 92, 116], 'bl': [36, 96, 116, 116] });
        // Also check that the resource blob has not been updated
        expect(processedResource1.blob_updated_time).toBe(resource1.blob_updated_time);
        expect(processedResource1.updated_time).toBeGreaterThan(resource1.updated_time);
        const processedResource2 = await Resource_1.default.load(resource2.id);
        expect(processedResource2.ocr_text).toBe('');
        expect(processedResource2.ocr_status).toBe(types_1.ResourceOcrStatus.Done);
        expect(processedResource2.ocr_error).toBe('');
        const processedResource3 = await Resource_1.default.load(resource3.id);
        expect(processedResource3.ocr_text).toBe('Declaration\n' +
            '| declare that:\n' +
            '® | will arrive in the UK within the next 48 hours\n' +
            '® | understand | have to provide proof of a negative COVID 19 test prior to departure to the UK (unless\n' +
            'exempt)\n' +
            '® | have provided my seat number, if relevant\n' +
            '® The information | have entered in this form is correct\n' +
            '® | understand it could be a criminal offence to provide false details and | may be prosecuted\n' +
            'If any of your information changes once you have submitted your details, such as travel details, seat number, or\n' +
            'contact information, you must complete a new form.\n' +
            '| confirm that | understand and agree with the above declarations.');
        expect(processedResource3.ocr_status).toBe(types_1.ResourceOcrStatus.Done);
        expect(processedResource3.ocr_error).toBe('');
        // Also check that the resource blob has not been updated
        expect(processedResource2.blob_updated_time).toBe(resource2.blob_updated_time);
        expect(processedResource2.updated_time).toBeGreaterThan(resource2.updated_time);
        await service.dispose();
        // On CI these tests can randomly throw the error "Exceeded timeout of
        // 90000 ms for a test.". So for now increase the timeout and if that's
        // not sufficient it means the test is simply stuck, and we should use
        // `jest.retryTimes(2)`
    }, 60000 * 5);
    test.each([
        // Use embedded text (skip OCR)
        ['dummy.pdf', 'Dummy PDF file'],
        ['multi_page__embedded_text.pdf', 'This is a test.\nTesting...\nThis PDF has 3 pages.\nThis is page 3.'],
    ])('should process PDF resources', async (samplePath, expectedText) => {
        const { resource } = await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/${samplePath}` });
        const service = (0, test_utils_1.newOcrService)();
        await service.processResources();
        const processedResource = await Resource_1.default.load(resource.id);
        expect(processedResource.ocr_text).toBe(expectedText);
        expect(processedResource.ocr_status).toBe(types_1.ResourceOcrStatus.Done);
        expect(processedResource.ocr_error).toBe('');
        await service.dispose();
    });
    it('should handle case where resource blob has not yet been downloaded', async () => {
        await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/dummy.pdf` });
        await (0, test_utils_1.synchronizerStart)();
        await (0, test_utils_1.switchClient)(2);
        await (0, test_utils_1.synchronizerStart)();
        await (0, time_1.msleep)(1);
        const service = (0, test_utils_1.newOcrService)();
        await service.processResources();
        {
            const resource = (await Resource_1.default.all())[0];
            expect(resource.ocr_text).toBe('');
            expect(resource.ocr_error).toBe('');
            expect(resource.ocr_status).toBe(types_1.ResourceOcrStatus.Todo);
        }
        await (0, test_utils_1.resourceFetcher)().startAndWait();
        await service.processResources();
        {
            const resource = (await Resource_1.default.all())[0];
            expect(resource.ocr_text).toBe('Dummy PDF file');
            expect(resource.ocr_error).toBe('');
            expect(resource.ocr_status).toBe(types_1.ResourceOcrStatus.Done);
        }
        await service.dispose();
    });
    it('should handle case where resource blob cannot be downloaded', async () => {
        await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/dummy.pdf` });
        await (0, test_utils_1.synchronizerStart)();
        await (0, test_utils_1.switchClient)(2);
        await (0, test_utils_1.synchronizerStart)();
        const resource = (await Resource_1.default.all())[0];
        // ----------------------------------------------------------------
        // Fetch status is an error so OCR status will be an error too
        // ----------------------------------------------------------------
        await Resource_1.default.setLocalState(resource.id, {
            resource_id: resource.id,
            fetch_status: Resource_1.default.FETCH_STATUS_ERROR,
            fetch_error: 'cannot be downloaded',
        });
        const service = (0, test_utils_1.newOcrService)();
        // The service will print a warning so we disable it in tests
        Logger_1.default.globalLogger.enabled = false;
        await service.processResources();
        Logger_1.default.globalLogger.enabled = true;
        {
            const resource = (await Resource_1.default.all())[0];
            expect(resource.ocr_text).toBe('');
            expect(resource.ocr_error).toContain('Cannot process resource');
            expect(resource.ocr_error).toContain('cannot be downloaded');
            expect(resource.ocr_status).toBe(types_1.ResourceOcrStatus.Error);
        }
        // ----------------------------------------------------------------
        // After the fetch status is reset and the resource downloaded, it
        // should also retry OCR and succeed.
        // ----------------------------------------------------------------
        await Resource_1.default.resetFetchErrorStatus(resource.id);
        await (0, test_utils_1.resourceFetcher)().startAndWait();
        await service.processResources();
        {
            const resource = (await Resource_1.default.all())[0];
            expect(resource.ocr_text).toBe('Dummy PDF file');
            expect(resource.ocr_error).toBe('');
            expect(resource.ocr_status).toBe(types_1.ResourceOcrStatus.Done);
        }
        await service.dispose();
    });
    it('should handle conflicts if two clients process the same resource then sync', async () => {
        await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/dummy.pdf` });
        const service1 = (0, test_utils_1.newOcrService)();
        await (0, test_utils_1.synchronizerStart)();
        await service1.processResources();
        await (0, test_utils_1.switchClient)(2);
        await (0, test_utils_1.synchronizerStart)();
        await (0, time_1.msleep)(1);
        await (0, test_utils_1.resourceFetcher)().startAndWait();
        const service2 = (0, test_utils_1.newOcrService)();
        await service2.processResources();
        await (0, test_utils_1.synchronizerStart)();
        const expectedResourceUpdatedTime = (await Resource_1.default.all())[0].updated_time;
        await (0, test_utils_1.switchClient)(1);
        await (0, test_utils_1.synchronizerStart)();
        // A conflict happened during sync, but it is resolved by keeping the
        // remote version.
        expect((await Resource_1.default.all()).length).toBe(1);
        {
            const resource = (await Resource_1.default.all())[0];
            expect(resource.ocr_text).toBe('Dummy PDF file');
            expect(resource.ocr_error).toBe('');
            expect(resource.ocr_status).toBe(types_1.ResourceOcrStatus.Done);
            expect(resource.updated_time).toBe(expectedResourceUpdatedTime);
        }
        await service1.dispose();
        await service2.dispose();
    });
    // Use this to quickly test with specific images:
    // it('should process resources 2', async () => {
    // 	await createNoteAndResource({ path: `${require('os').homedir()}/Desktop/AllClients.png` });
    // 	const service = newOcrService();
    // 	await service.processResources();
    // 	console.info(await Resource.all());
    // 	await service.dispose();
    // });
    it('should generate text even on cases of lower confidence', async () => {
        const { resource } = await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/low_confidence_testing.png` });
        const service = (0, test_utils_1.newOcrService)();
        await service.processResources();
        const processedResource = await Resource_1.default.load(resource.id);
        expect(processedResource.ocr_text.includes('1.')).toBe(true);
        // cSpell:disable
        expect(processedResource.ocr_text.includes('eback Mountain (2005)')).toBe(true);
        // cSpell:enable
        expect(processedResource.ocr_text.includes('2.')).toBe(true);
        expect(processedResource.ocr_text.includes('Havoc (2005)')).toBe(true);
        expect(processedResource.ocr_text.includes('3.')).toBe(true);
        expect(processedResource.ocr_text.includes('Love & Other Drugs (2010)')).toBe(true);
        expect(processedResource.ocr_text.includes('4.')).toBe(true);
        expect(processedResource.ocr_text.includes('The Last Thing He Wanted (2020)')).toBe(true);
        await service.dispose();
    });
    it('should skip resources with an invalid ocr_driver_id', async () => {
        const { resource } = await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/dummy.pdf` });
        await Resource_1.default.save(Object.assign(Object.assign({}, resource), { ocr_driver_id: -123456 }));
        const service = (0, test_utils_1.newOcrService)();
        // Should not loop forever
        await service.processResources();
        const processedResource = await Resource_1.default.load(resource.id);
        expect(processedResource.ocr_text).toBe('');
        await service.dispose();
    });
    it('should process resources with ocr_driver_id 0 as printed text', async () => {
        const { resource } = await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/multi_page__embedded_text.pdf` });
        await Resource_1.default.save(Object.assign(Object.assign({}, resource), { ocr_driver_id: 0 }));
        const service = (0, test_utils_1.newOcrService)();
        await service.processResources();
        const processedResource = await Resource_1.default.load(resource.id);
        expect(processedResource.ocr_text).toBe('This is a test.\nTesting...\nThis PDF has 3 pages.\nThis is page 3.');
        expect(processedResource.ocr_status).toBe(types_1.ResourceOcrStatus.Done);
        expect(processedResource.ocr_error).toBe('');
        await service.dispose();
    });
    it('should skip HTR processing when the relevant setting is disabled', async () => {
        const { resource } = await (0, test_utils_1.createNoteAndResource)({ path: `${test_utils_1.ocrSampleDir}/multi_page__embedded_text.pdf` });
        Setting_1.default.setValue('ocr.handwrittenTextDriverEnabled', false);
        await Resource_1.default.save(Object.assign(Object.assign({}, resource), { ocr_driver_id: types_1.ResourceOcrDriverId.HandwrittenText, title: 'Test' }));
        const service = (0, test_utils_1.newOcrService)();
        await service.processResources();
        // Should not process HandwrittenText results
        const processedResource = await Resource_1.default.load(resource.id);
        expect(processedResource).toMatchObject({
            ocr_text: '',
            title: 'Test',
            ocr_status: types_1.ResourceOcrStatus.Todo,
            ocr_error: '',
        });
        await service.dispose();
    });
});
//# sourceMappingURL=OcrService.test.js.map