pdfmupdf

How a font is detected to be bold/italic/plain that is used in PDF


While Extracting Content from PDF using the MuPDF library, i am getting the Font name only not its font-face.

Do i guess (eg.bold in font-name though not the right way) or there is any other way to detect that specific font is Bold/Italic/Plain.


Solution

  • I have used itextsharp to extract font-family ,font color etc

    public void Extract_inputpdf() {
    
      text_input_File = string.Empty;
    
      StringBuilder sb_inputpdf = new StringBuilder();
      PdfReader reader_inputPdf = new PdfReader(path); //read PDF
      for (int i = 0; i <= reader_inputPdf.NumberOfPages; i++) {
    
        TextWithFont_inputPdf inputpdf = new TextWithFont_inputPdf();
        text_input_File = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader_inputPdf, i, inputpdf);
    
        sb_inputpdf.Append(text_input_File);
        input_pdf = sb_inputpdf.ToString();
      }
      reader_inputPdf.Close();
      clear();
    }
    
    public class TextWithFont_inputPdf: iTextSharp.text.pdf.parser.ITextExtractionStrategy {
      public void RenderText(iTextSharp.text.pdf.parser.TextRenderInfo renderInfo) {
    
        string curFont = renderInfo.GetFont().PostscriptFontName;
        string divide = curFont;
        string[] fontnames = null;
    
        //split the words from postscript if u want separate. it will be in this
      }
    }
    public string GetResultantText() {
    
      return result.ToString();
    }