const fs = require('fs') const crypto = require('crypto') const pAll = require('p-all') const https = require('https') const glob = require('glob') const path = require('path') const URL = require('url').URL; const jsdom = require('jsdom') const jsyaml = require('js-yaml') const SITE_PATH = __dirname + '/../..'

const IS_PROD = '{{ jekyll.environment }}' === 'production' const GENERATE_PDF_LOCALLY = '{{ site.offline }}' === 'true' || false const S3_STORAGE_URL = new URL('opendoc-theme-pdf.s3-ap-southeast-1.amazonaws.com')

// For dealing with imagess when baseurl is set, remove leading slashes, if any, for standardization const BASEURL = '{{ site.baseurl }}'.replace('/', '') const LOCAL_PDF_DOLER = path.join(SITE_PATH, 'assets', 'pdfs') // local folder for pdfs

// S3 folder; replace slashes to avoid creating sub-folders

const S3_PDF_FOLDER = '{{ site.repository }}'.replace(///g, '-') + (IS_PROD ? '' : '-staging')

const BUCKET_NAME = S3_STORAGE_URL.hostname.split(‘.’)

// CSS to be applied to the PDFs, this will be inserted in <head> const PATH_TO_CSS = path.join(SITE_PATH, 'assets', 'styles', 'main.css')

// Hash is stored as S3 metadata and served as custom header whenever the pdf is requested const SERIALIZED_HTML_HASH_HEADER = 'x-amz-meta-html-hash'

// Config.yml file path const CONFIG_YAML_PATH = path.join(SITE_PATH, '..', '_config.yml')

let pdf let pdfGenConcurrency = 1 if (GENERATE_PDF_LOCALLY) {

pdf = require('html-pdf')
console.log('Generating PDFs and storing locally instead.')

} else {

if (process.env.PDF_LAMBDA_KEY === undefined || 
    process.env.PDF_LAMBDA_SERVER === undefined) {
        console.log('Environment variables PDF_LAMBDA_KEY or PDF_LAMBDA_SERVER for AWS Lambda not present')
        process.exit(1)
    }
pdfGenConcurrency = process.env.PDF_GEN_CONCURRENCY !== undefined ?
    parseInt(process.env.PDF_GEN_CONCURRENCY) :
    50 // Tuned for Netlify
console.log(`Generating PDFs on AWS Lambda with concurrency of ${pdfGenConcurrency}.`)
console.log(`PDFs will be placed in bucket: ${BUCKET_NAME} in folder ${S3_PDF_FOLDER}.`)

}

// These options are only applied when PDFs are built locally const localPdfOptions = {

height: '594mm',        // allowed units: mm, cm, in, px
width: '420mm',
base: 'file://' + SITE_PATH + '/',
border: {
    right: '100px', // default is 0, units: mm, cm, in, px
    left: '100px',
},
header: {
    height: '80px',
},
footer: {
    height: '80px',
},

}

// List of top-level folder names which may contain html but are not to be printed const printIgnoreFolders = ['assets', 'files', 'iframes', 'images'] // List of top-level .html files which are not to be printed const printIgnoreFiles = ['export.html', 'index.html']

// Tracking statistics let numPdfsStarted = 0 let numPdfsUnchanged = 0 let numPdfsError = 0 let numPdfsSuccess = 0 let numTotalPdfs = 0 const TIMER = 'Time to create PDFs'

const main = async () => {

// creating exports of individual documents
console.time(TIMER)
const docFolders = getDocumentFolders(SITE_PATH, printIgnoreFolders)
await exportPdfTopLevelDocs(SITE_PATH)
await exportPdfDocFolders(SITE_PATH, docFolders)
console.log(`PDFs created with success:${numPdfsSuccess} unchanged:${numPdfsUnchanged} error:${numPdfsError} total:${numTotalPdfs}`)
console.timeEnd(TIMER)

}

const exportPdfTopLevelDocs = async (sitePath) => {

let htmlFilePaths = glob.sync('*.html', { cwd: sitePath })
htmlFilePaths = htmlFilePaths.filter((filepath) => !printIgnoreFiles.includes(filepath))
htmlFilePaths = htmlFilePaths.map((filepath) => path.join(sitePath, filepath))
// Remove folders without HTML files (don't want empty pdfs)
if (htmlFilePaths.length === 0) return
numTotalPdfs++
const orderForFolder = getOrderFromConfig('/')
if (orderForFolder && orderForFolder.length) {
    htmlFilePaths = reorderHtmlFilePaths(htmlFilePaths, orderForFolder)
}
await createPdf(htmlFilePaths, sitePath, 'export')

}

const exportPdfDocFolders = (sitePath, docFolders) => {

const actions = []
for (let folder of docFolders) {
    // find all the folders containing html files
    const folderPath = path.join(sitePath, folder)
    let htmlFilePaths = glob.sync('*.html', { cwd: folderPath })
    htmlFilePaths = htmlFilePaths.filter((filepath) => !printIgnoreFiles.includes(filepath))
    htmlFilePaths = htmlFilePaths.map((filepath) => path.join(folderPath, filepath))
    // Remove folders without HTML files (don't want empty pdfs)
    if (htmlFilePaths.length === 0) continue
    numTotalPdfs++
    const orderForFolder = getOrderFromConfig(folder)
    if (orderForFolder && orderForFolder.length) {
        htmlFilePaths = reorderHtmlFilePaths(htmlFilePaths, orderForFolder)
    }
    actions.push((() => createPdf(htmlFilePaths, folderPath, folder)))
}
return pAll(actions, { concurrency: pdfGenConcurrency })

}

// Concatenates the contents in .html files, and outputs export.pdf in the specified output folder const createPdf = (htmlFilePaths, outputFolderPath, documentName) => {

logStartedPdf(outputFolderPath)
// docprint.html is our template to build pdf up from.
const exportHtmlFile = fs.readFileSync(__dirname + '/docprint.html')
let cssFile = ''
try {
    cssFile = fs.readFileSync(PATH_TO_CSS)
} catch(err) {
    console.log('Failed to read CSS file at ' + PATH_TO_CSS +', CSS will not be applied')
}
const exportDom = new jsdom.JSDOM(exportHtmlFile)
const exportDomBody = exportDom.window.document.body
const exportDomMain = exportDom.window.document.getElementById('main-content')
let addedTitle = false
let addedDocTitle = false
htmlFilePaths.forEach(function (filePath) {
    const file = fs.readFileSync(filePath)
    const dom = new jsdom.JSDOM(file, {
        resources: 'usable' // to get JSDOM to load stylesheets
    })
    // html-pdf can't deal with these
    removeTagsFromDom(dom, 'script')
    removeTagsFromDom(dom, 'iframe')
    inlineImages(dom, outputFolderPath)
    // Site titles needs only be added once
    if (!addedTitle) {
        try {
            const oldTitle = dom.window.document.getElementsByClassName('site-header-text')[0]
            exportDomBody.insertBefore(oldTitle, exportDomMain)
            addedTitle = true
        } catch (error) {
            console.log('Failed to append Title, skipping: ' + error)
        }
    }
    // Document titles too
    if (!addedDocTitle) {
        try {
            const oldDocTitle = dom.window.document.getElementsByClassName('description-container')[0]
            exportDomBody.insertBefore(oldDocTitle, exportDomMain)
            const hr = dom.window.document.createElement('HR')
            exportDomBody.insertBefore(hr, exportDomMain)
            addedDocTitle = true
        } catch (error) {
            console.log('Failed to append Doc Title, skipping: ' + error)
        }
    }
    // Concat all the id:main-content divs
    try {
        const oldNode = dom.window.document.getElementById('main-content')
        exportDomMain.innerHTML += oldNode.innerHTML
    } catch (error) {
        console.log('Failed to append Node, skipping: ' + error)
    }
    dom.window.close()
})
const serializedHtmlHash = crypto.createHash('md5').update(exportDom.serialize()).digest('base64')
exportDom.window.document.head.innerHTML += '<style>' + cssFile + '</style>'
console.log('createpdf hash for:' + outputFolderPath + ': ' + serializedHtmlHash)
if (GENERATE_PDF_LOCALLY) {
    exportDomBody.className += ' print-content-large'
    // Generate and store locally
    return new Promise((resolve, reject) => {
        const url = path.join(LOCAL_PDF_DOLER, documentName + '.pdf')
        pdf.create(exportDom.serialize(), localPdfOptions).toFile(url, (err, res) => {
            if (err) {
                logErrorPdf('Creating PDFs locally', err)
                return reject()
            }
            logSuccessPdf(res.filename)
            resolve()
        })
        exportDom.window.close()
    })
} else {
    // Apply small font sizes because puppeteer tends to print big
    exportDomBody.className += ' print-content-small'
    // Code for this API lives at https://github.com/opendocsg/pdf-lambda
    const pdfName = `${documentName}.pdf`
    return new Promise(function (resolve, reject) {
        // Promise resolves if PDF is present and hash matches. Else reject.
        const pdfS3Url = S3_STORAGE_URL.toString() + S3_PDF_FOLDER + '/' + pdfName
        const options = {
            method: 'HEAD'
        }
        const pdfExistsRequest = https.request(pdfS3Url, options, function (res) {
            if (res.statusCode === 404) {
                return reject('PDF not present')
            }
            if (!(SERIALIZED_HTML_HASH_HEADER in res.headers)) {
                return reject('HTML hash header not present')
            }
            if (res.headers[SERIALIZED_HTML_HASH_HEADER] !== serializedHtmlHash) {
                return reject('PDF hash does not match')
            }
            logUnchangedPdf(pdfName, pdfS3Url)
            resolve()
        })
        pdfExistsRequest.on('error', function (err) {
            console.log(`pdfExistsRequest encountered error for ${pdfName}:, ${err}`)
            return reject()
        })
        pdfExistsRequest.end()
    }).then(() => {},
        function (rejected) {
            // Rejected: send to lambda function to create PDF
            const options = {
                method: 'POST',
                headers: {
                    'x-api-key': process.env.PDF_LAMBDA_KEY,
                    'content-type': 'application/json',
                }
            }
            const pdfCreationBody = {
                'serializedHTML': exportDom.serialize(),
                'serializedHTMLName': S3_PDF_FOLDER + '/' + pdfName,
                'serializedHTMLHash': serializedHtmlHash,
                'bucketName': BUCKET_NAME
            }
            return new Promise(function (resolve, reject) {
                const pdfCreationRequest = https.request(process.env.PDF_LAMBDA_SERVER, options, function (res) {
                    if (res.statusCode < 200 || res.statusCode >= 300) {
                        logErrorPdf(`pdfCreationRequest status code for ${pdfName}: `, res.statusCode)
                        return reject()
                    }
                    logSuccessPdf(pdfName)
                    return resolve()
                })
                pdfCreationRequest.on('error', function(err) {
                    logErrorPdf(`pdfCreationRequest encountered error for ${pdfName}:`, err)
                    return reject()
                })
                pdfCreationRequest.write(JSON.stringify(pdfCreationBody))
                pdfCreationRequest.end()
            }).catch((error) => {
                logErrorPdf(`pdfCreation promise error for ${pdfName}`, error)
            }).finally(() => {
                exportDom.window.close()
            })
        })
}

}

const logStartedPdf = (outputFolderPath) => {

numPdfsStarted++
console.log(`createpdf started for:${outputFolderPath} (${numPdfsStarted}/${numTotalPdfs})`)

}

const logUnchangedPdf = (outputFolderPath, pdfUrl) => {

numPdfsUnchanged++
console.log(`createpdf unchanged for:${outputFolderPath} at ${pdfUrl} (${numPdfsUnchanged}/${numTotalPdfs})`)

}

const logErrorPdf = (origin, error) => {

numPdfsError++
console.log(`createpdf error for: ${origin}: ${error}(${numPdfsError}/${numTotalPdfs})`)

}

const logSuccessPdf = (outputPdfPath) => {

numPdfsSuccess++
console.log(`createpdf success for:${outputPdfPath} (${numPdfsSuccess}/${numTotalPdfs})`)

}

const imageType = {

'.png':'image/png',
'.jpg':'image/jpeg',
'.jpeg':'image/jpeg',
'.bmp':'image/bmp',
'.webp':'image/webp',

}

// Load images and inline them const inlineImages = (dom, outputFolderPath) => {

const imgs = dom.window.document.getElementsByTagName('img')
for (let i = 0; i < imgs.length; i++) {
    const img = imgs[i]
    const originalImagePath = img.src
    if (!originalImagePath.startsWith('http://') && !originalImagePath.startsWith('https://')) {
        // Convert all file paths into absolute file paths
        let imgPath
        if (originalImagePath.startsWith('/')) {
            // If baseurl is set, remove baseurl for images to be found
            if (BASEURL.length > 0) {
                imgPath = path.join(__dirname, '..', '..', originalImagePath.replace('/' + BASEURL, ''))
            } else {
                imgPath = path.join(__dirname, '..', '..', originalImagePath)
            }
        } else {
            // relative path
            imgPath = path.join(outputFolderPath, originalImagePath).toString()
        }
        if (fs.existsSync(imgPath)) {
            const imgRaw = fs.readFileSync(imgPath)
            if (path.extname(imgPath) === '.svg') { // don't encode svgs in base64, simply insert them
                img.src = 'data:image/svg+xml;utf8,' + imgRaw.toString('utf-8')
            } else {
                const dataType = imageType[path.extname(imgPath)] || 'image/png'
                const uri = 'data:' + dataType + ';base64,' + imgRaw.toString('base64')
                img.src = uri
            }
        }
    }
}

}

// Returns a list of the valid document (i.e. folder) paths const getDocumentFolders = (sitePath, printIgnoreFolders) => {

return fs.readdirSync(sitePath).filter(function (filePath) {
    return fs.statSync(path.join(sitePath, filePath)).isDirectory() &&
        !printIgnoreFolders.includes(filePath)
})

}

// Returns true if config file has order for particular folder const getOrderFromConfig = (folderName) => {

try {
    const configYml = yamlToJs(CONFIG_YAML_PATH)
    const folders = configYml.folders
    for (folder of folders) {
        if (folder.name.toLowerCase() === folderName.toLowerCase()) {
            return folder.order
        }
    }
    return null
} catch (error) {
    return null
}

}

// Mutates the htmlFilepath array to match order provided in order const reorderHtmlFilePaths = (htmlFilePaths, order) => {

const orderedHtmlFilePaths = []
for (let i = 0; i < order.length; i++) {
    const name = path.basename(order[i], '.md')
    htmlFilePaths.some((filePath) => {
        if (path.basename(filePath, '.html') === name) {
            orderedHtmlFilePaths.push(filePath)
        }
    })
}
return orderedHtmlFilePaths

}

// Removes <tag></tag> from dom and everything in between them const removeTagsFromDom = (dom, tagname) => {

const tags = dom.window.document.getElementsByTagName(tagname)
for (let i = tags.length - 1; i >= 0; i--) {
    tags[i].parentNode.removeChild(tags[i])
}

}

// converts .md to JS Object const markdownToJs = (filepath) => {

const configString = fs.readFileSync(filepath).toString().replace(/---/g, '')
return jsyaml.load(configString)

}

const yamlToJs = (filepath) => {

return jsyaml.load(fs.readFileSync(filepath))

}

main()