While Extracting Content from PDF using the MuPDF library, i am getting the Font name only not its font-face.
Do i guess (eg.bold in font-name though not the right way) or there is any other way to detect that specific font is Bold/Italic/Plain.
I have used itextsharp to extract font-family ,font color etc
public void Extract_inputpdf() {
text_input_File = string.Empty;
StringBuilder sb_inputpdf = new StringBuilder();
PdfReader reader_inputPdf = new PdfReader(path); //read PDF
for (int i = 0; i <= reader_inputPdf.NumberOfPages; i++) {
TextWithFont_inputPdf inputpdf = new TextWithFont_inputPdf();
text_input_File = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader_inputPdf, i, inputpdf);
sb_inputpdf.Append(text_input_File);
input_pdf = sb_inputpdf.ToString();
}
reader_inputPdf.Close();
clear();
}
public class TextWithFont_inputPdf: iTextSharp.text.pdf.parser.ITextExtractionStrategy {
public void RenderText(iTextSharp.text.pdf.parser.TextRenderInfo renderInfo) {
string curFont = renderInfo.GetFont().PostscriptFontName;
string divide = curFont;
string[] fontnames = null;
//split the words from postscript if u want separate. it will be in this
}
}
public string GetResultantText() {
return result.ToString();
}