How to Detect and Save Documents to PDF with HTML5 and JavaScript
December 16, 2024

How to Detect and Save Documents to PDF with HTML5 and JavaScript

Extracting and saving documents such as receipts, invoices, and contracts as PDF files is a common requirement for many businesses. In this article, we enhanced the web file editor project built using Dynamsoft file viewer Added the ability to detect files and save them as PDF. Document detection function is provided by Dynamsoft captures vision.


Demonstration video: Detecting files and saving them as PDF


Online demonstration

https://yushulx.me/web-document-annotation/


Prerequisites


Implement file detection and correction capabilities in HTML5 and JavaScript

The following sections guide you through file detection and remediation using HTML5 and JavaScript. If you have already downloaded the source code, you can jump to Step 2.


Step 1: Get the source code

  1. Clone the source code from the GitHub repository:

    git clone https://github.com/yushulx/web-twain-document-scan-management.git
    
  2. Navigate to document_annotation Table of contents:

    cd web-twain-document-scan-management/examples/document_annotation
    
  3. Open the project in Visual Studio Code.


Step2: Add document detection button

  1. exist main.cssadd a new material icon for the file detection button:

    .icon-document_scanner::before {
        content: "crop_free";
    }
    
    .icon-document_scanner {
        display: flex;
        font-size: 1.5em;
    }
    
    

  2. Define the file detection button and add it to the toolbar main.js:

    
    const documentButton = {
        type: Dynamsoft.DDV.Elements.Button,
        className: "material-icons icon-document_scanner",
        tooltip: "Detect document",
        events: {
            click: "detectDocument",
        }
    }
    
    const pcEditViewerUiConfig = {
        type: Dynamsoft.DDV.Elements.Layout,
        flexDirection: "column",
        className: "ddv-edit-viewer-desktop",
        children: [
            {
                type: Dynamsoft.DDV.Elements.Layout,
                className: "ddv-edit-viewer-header-desktop",
                children: [
                    {
                        type: Dynamsoft.DDV.Elements.Layout,
                        children: [
                            Dynamsoft.DDV.Elements.ThumbnailSwitch,
                            Dynamsoft.DDV.Elements.Zoom,
                            Dynamsoft.DDV.Elements.FitMode,
                            Dynamsoft.DDV.Elements.Crop,
                            Dynamsoft.DDV.Elements.Filter,
                            Dynamsoft.DDV.Elements.Undo,
                            Dynamsoft.DDV.Elements.Redo,
                            Dynamsoft.DDV.Elements.DeleteCurrent,
                            Dynamsoft.DDV.Elements.DeleteAll,
                            Dynamsoft.DDV.Elements.Pan,
                            Dynamsoft.DDV.Elements.AnnotationSet,
                            qrButton,
                            checkButton,
                            scanButton,
                            clearButton,
                            signatureButton,
                            documentButton,
                        ],
                    },
                    {
                        type: Dynamsoft.DDV.Elements.Layout,
                        children: [
                            {
                                type: Dynamsoft.DDV.Elements.Pagination,
                                className: "ddv-edit-viewer-pagination-desktop",
                            },
                            loadButton,
                            downloadButton,
                        ],
                    },
                ],
            },
            Dynamsoft.DDV.Elements.MainView,
        ],
    };
    
  3. Add a click event handler for the file detection button:

    editViewer.on("detectDocument", detectDocument);
    
    async function detectDocument() {
        ...
    }
    


Step 3: Create a pop-up dialog box for file detection and normalization

The pop-up dialog box for file detection and normalization includes three buttons: detection, standardizationand Cancel.

  • detection: Detect document boundaries.
  • standardization: Standardized document.
  • Cancel: Close the dialog box.

HTML code

id="document-detection" class="overlay">
class="document-container">

Document Detection

class="form-group">

Enter full screen mode

Exit full screen mode

JavaScript code

let detectDocumentButton = document.getElementById("detectDocument");
let cancelDocumentButton = document.getElementById("cancelDocument");
let normalizeDocumentButton = document.getElementById("normalizeDocument");

cancelDocumentButton.addEventListener('click', () => {
    document.getElementById("document-detection").style.display = "none";
});

normalizeDocumentButton.addEventListener('click', async () => {
    document.getElementById("document-detection").style.display = "none";

    ...
});

detectDocumentButton.addEventListener('click', async () => {
    document.getElementById("document-detection").style.display = "none";

    ...
});
Enter full screen mode

Exit full screen mode


Step 4: Edit the document corners and fix the document

  1. Detect the document and draw outlines based on four corner points in the editing viewer:

    detectDocumentButton.addEventListener('click', async () => {
        document.getElementById("document-detection").style.display = "none";
    
        const settings = {
            quality: 100,
            saveAnnotation: false,
        };
    
        const image = await editViewer.currentDocument.saveToJpeg(editViewer.getCurrentPageIndex(), settings);
        const result = await cvRouter.capture(image, "DetectDocumentBoundaries_Default");
    
        for (let item of result.items) {
            if (item.type !== Dynamsoft.Core.EnumCapturedResultItemType.CRIT_DETECTED_QUAD) {
                continue;
            }
    
            let points = item.location.points;
    
            let currentPageId = currentDoc.pages[editViewer.getCurrentPageIndex()];
            let pageData = await currentDoc.getPageData(currentPageId);
    
            documentPoints = points;
    
            const polygonOptions = {
                points: points.map(p => {
                    return {
                        x: p.x / pageData.display.width * pageData.mediaBox.width,
                        y: p.y / pageData.display.height * pageData.mediaBox.height
                    }
                }),
                borderColor: "rgb(0,0,255)",
                flags: {
                    print: false,
                    noView: false,
                    readOnly: false,
    
                }
            }
    
            let polygon = Dynamsoft.DDV.annotationManager.createAnnotation(currentPageId, "polygon", polygonOptions);
            polygon['name'] = 'document';
    
            break;
        }
    });
    
  2. Normalized document image:

    normalizeDocumentButton.addEventListener('click', async () => {
        document.getElementById("document-detection").style.display = "none";
    
        let currentPageId = currentDoc.pages[editViewer.getCurrentPageIndex()];
        let blob = await normalizeImage();
    
        if (blob) {
            await currentDoc.updatePage(currentPageId, blob);
            documentPoints = null;
        }
    });
    
    async function normalizeImage() {
    
        if (!documentPoints) {
            return null;
        }
    
        let params = await cvRouter.getSimplifiedSettings("NormalizeDocument_Default");
        params.roi.points = documentPoints;
        params.roiMeasuredInPercentage = 0;
        await cvRouter.updateSettings("NormalizeDocument_Default", params);
    
        const settings = {
            quality: 100,
            saveAnnotation: false,
        };
    
        const image = await editViewer.currentDocument.saveToJpeg(editViewer.getCurrentPageIndex(), settings);
        cvRouter.maxCvsSideLength = 9999;
        const result = await cvRouter.capture(image, "NormalizeDocument_Default"); 
    
        for (let item of result.items) {
            if (item.type !== Dynamsoft.Core.EnumCapturedResultItemType.CRIT_NORMALIZED_IMAGE) {
                continue;
            }
    
            let blob = await item.toBlob();
            return blob;
        }
    }
    


source code

https://github.com/yushulx/web-twain-document-scan-management/tree/main/examples/document_annotation

2024-12-16 02:49:28

Leave a Reply

Your email address will not be published. Required fields are marked *