Sometimes we might need to convert PDF into HTML and extract text from it using C#, so in this article, I have explained, how you can convert PDF into HTML in ASP.NET MVC and using External .exe file.
Step 1:- Create new project in ASP.NET MVC
Step 2:- Now Select Empty Template and core reference As MVC
Step 3:- Now open Solution Explore & click on Controller Folder and create new controller.
Step 4:- Now select Empty controller
Step 5:- Now add Controller Name
Step 6:- Right click on Index action method & Add view
Step 7:- Now select View
Step 8:- Now select Layout Page & Add View
Step 9:- Now open NuGet Package Manager and download Or Install “Aspose.Pdf”
Step 10:- The source code of the Index Page is given below. Add this code in Index.cshtml page
@{ ViewBag.Title = "Home Page"; } <br /> <br /> <div class="row col-md-12"> <input type="file" id="flPDF" class="form-control" /> </div> <br /> <br /> <div class="row col-md-12"> <button type="button" class="btn btn-primary" id="btnSave">PDF To HTML</button> </div> <div class="row col-md-12"> <label id="Msg">File Converted Succesfully.....</label> </div> <br /> <br /> <script src="~/Scripts/jquery-3.4.1.min.js"></script> <script> $("#Msg").hide(); $(document).on("click", "#btnSave", function () { debugger if ($('#flPDF').get(0).files.length == 0) { alert("Please select PDF file"); } else { var file = $("#flPDF").get(0).files; var fileData = new FormData(); fileData.append("PDFFIle", file[0]); $.ajax({ url: '/Home/PDFToHTML', type: "POST", contentType: false, processData: false, data: fileData, success: function (result) { if (result.IsSuccess) { $("#Msg").show(); } }, error: function (err) { alert(err); } }) } }); </script>
Step 11:- Here is our Home Controller side source code as below
using Aspose.Pdf; using System; using System.Web.Mvc; using System.IO; namespace Pdf_To_Html.Controllers { public class HomeController : Controller { public ActionResult Index() { return View(); } [HttpPost] public JsonResult PDFToHTML() { try { string outHtmlFile = "D:\\PDF TO HTML\\SingleHTML_out.html"; var httpContext = HttpContext; string path = httpContext.Server.MapPath("~/Uploads"); if (!Directory.Exists(path)) { Directory.CreateDirectory(path); } string fname = string.Empty; var httpContextBrowser = httpContext.Request.Browser.Browser.ToUpper(); var pdfFile = httpContext.Request.Files["PDFFIle"]; if (pdfFile != null) { string extention = string.Empty; if (httpContextBrowser == "IE" || httpContextBrowser == "INTERNETEXPLORER") { string[] testfiles = pdfFile.FileName.Split(new char[] { '\\' }); fname = testfiles[testfiles.Length - 1]; } else { fname = pdfFile.FileName; } extention = Path.GetExtension(fname); MemoryStream imgfile = new MemoryStream(); pdfFile.InputStream.CopyTo(imgfile); byte[] data = imgfile.ToArray(); pdfFile.SaveAs(path + "\\" + fname); } Document doc = new Document(path + "\\" + fname); HtmlSaveOptions newOptions = new HtmlSaveOptions(); newOptions.PartsEmbeddingMode = HtmlSaveOptions.PartsEmbeddingModes.EmbedAllIntoHtml; newOptions.LettersPositioningMethod = HtmlSaveOptions.LettersPositioningMethods.UseEmUnitsAndCompensationOfRoundingErrorsInCss; newOptions.RasterImagesSavingMode = HtmlSaveOptions.RasterImagesSavingModes.AsEmbeddedPartsOfPngPageBackground; newOptions.FontSavingMode = HtmlSaveOptions.FontSavingModes.SaveInAllFormats; doc.Save(outHtmlFile, newOptions); byte[] fileBytes = System.IO.File.ReadAllBytes(outHtmlFile); return Json(new { IsSuccess = true}, JsonRequestBehavior.AllowGet); } catch (Exception ex) { return Json(new { IsSuccess = false }, JsonRequestBehavior.AllowGet); } } } }
Step 12 :- Let’s See Below Output.
Please give your valuable feedback and if you any consort regarding this article, please let me know.
Also, Check How To Upload Images Using Cloudinary In ASP.Net MVC