pythonpython-3.xdocxpython-docx

How create a multilevel numeric list using Python-Docx that also works on Google Docs?


My goal is to create a nested numeric list using python-docx and upload to Google Docs. For the present moment, I could handle the level 0 lists and works fine, the problem starts when there is more than one level on lists, which results on inaccurate level text (some attempts empty, other attempts 1, etc.).

This is also a problem when trying to upload the file to a Google Docs, since the file is not well defined, the conversion does not works well, on the worst scenario the file on Docx works but when uploaded on Google Docs the subItems has no level text description.

I also made a try by creating a Word file from MS Word itself and uploading to Google Docs and works perfectly, which makes sure the problem is on the build of my list on python-docx.

I know there is a few similar questions about numeric lists, but meanwhile most of the there is not an answer that precisely solves the problem, also my questions is about nested lists + Google Docs.

Here is the simplest way to reproduce my problem:

from docx import Document
from docx.shared import Inches
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

LIST_INDENT = 0.5
MAX_INDENT = 5.5 # To stop indents going off the page
prev_li = None

def list_number(doc, paragraph=None, prev=None, level=None):
    def get_next_abstractNumId(numbering):
        """
        Get the next available abstractNumId by checking the existing abstractNum elements.
        """
        abstract_nums = numbering.findall(qn('w:abstractNum'))
        existing_ids = [int(num.get(qn('w:abstractNumId'))) for num in abstract_nums]
        return max(existing_ids) + 1 if existing_ids else 0

    def get_next_numId(numbering):
        """
        Get the next available numId by checking the existing num elements.
        """
        nums = numbering.findall(qn('w:num'))
        existing_ids = [int(num.get(qn('w:numId'))) for num in nums]
        return max(existing_ids) + 1 if existing_ids else 0

    def create_abstract_num(numbering, level):
        """
        Create an abstract numbering definition.
        """
        abstract_num_id = get_next_abstractNumId(numbering)
        abstract_num = OxmlElement('w:abstractNum')
        abstract_num.set(qn('w:abstractNumId'), str(abstract_num_id))

        lvl = OxmlElement('w:lvl')
        lvl.set(qn('w:ilvl'), str(level))

        start = OxmlElement('w:start')
        start.set(qn('w:val'), '1')
        lvl.append(start)

        numFmt = OxmlElement('w:numFmt')
        numFmt.set(qn('w:val'), 'decimal')
        lvl.append(numFmt)

        lvlText = OxmlElement('w:lvlText')
        lvlText.set(qn('w:val'), '%1.')
        lvl.append(lvlText)

        lvlJc = OxmlElement('w:lvlJc')
        lvlJc.set(qn('w:val'), 'left')
        lvl.append(lvlJc)

        pStyle = OxmlElement('w:pStyle')
        pStyle.set(qn('w:val'), 'LIST_NUMBER')
        lvl.append(pStyle)

        abstract_num.append(lvl)

        numbering.append(abstract_num)
        return abstract_num_id

    def create_num(numbering, abstract_num_id):
        """
        Create a numbering instance linked to the abstract numbering definition.
        """
        num_id = get_next_numId(numbering)

        num = OxmlElement('w:num')
        num.set(qn('w:numId'), str(num_id))

        abstract_numId = OxmlElement('w:abstractNumId')
        abstract_numId.set(qn('w:val'), str(abstract_num_id))
        num.append(abstract_numId)

        numbering.append(num)

        return num_id

    numbering = doc.part.numbering_part.numbering_definitions._numbering

    if prev is None or prev._p.pPr is None or prev._p.pPr.numPr is None or prev._p.pPr.numPr.numId is None:
        level = 0 if level is None else level
        abstract_num_id = create_abstract_num(numbering, level)
        num_id = create_num(numbering, abstract_num_id)
    else:
        level = prev._p.pPr.numPr.ilvl.val if level is None else level
        num_id = prev._p.pPr.numPr.numId.val

    # Apply the numbering to the paragraph
    paragraph._p.get_or_add_pPr().get_or_add_numPr().get_or_add_numId().val = num_id
    paragraph._p.get_or_add_pPr().get_or_add_numPr().get_or_add_ilvl().val = level

def add_new_list_item(doc, text, level, list_style):
    global prev_li
    paragraph = doc.add_paragraph(text, style=list_style)
    paragraph.paragraph_format.left_indent = Inches(min(level * LIST_INDENT, MAX_INDENT))
    paragraph.paragraph_format.line_spacing = 1

    if list_style == 'List Number':
        list_number(doc=doc, paragraph=paragraph, prev=prev_li, level=level)
        prev_li = paragraph

document = Document()
document.add_heading('First Header')
add_new_list_item(doc=document, text='First Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Second Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Third Item', level=0, list_style='List Number')

prev_li = None
document.add_heading('Second Header')
add_new_list_item(doc=document, text='First Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='First SubItem', level=1, list_style='List Number')
add_new_list_item(doc=document, text='Second SubItem', level=1, list_style='List Number')
add_new_list_item(doc=document, text='Second Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Third Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Third SubItem', level=1, list_style='List Bullet')
add_new_list_item(doc=document, text='Fourth Item', level=0, list_style='List Number')

document.save('lists_test.docx')

You can check the result from the script here: Docx file result from script as you can see, looks fine initially since both lists looks has your own counter, but when uploaded to Google Docs, the sublists loses the counter, as you can see here: enter image description here

I think I'm missing some point about the numbering, I couldn't find a way to set the level text for a paragraph which there is no need to restart the count, I tried manually create a lvl XML and set all same attributes from create_abstract_num() but do seems to works.

Can anyone help me to find a solution for this problem? How can I properly set the correct numeric values for other nested lists?


Solution

  • Python-docx is not very complete when it comes to numberings. All supported numbering are single level only but not multi level. Adding multi level numbering would end in much low level code using XML methods to add at least one multi level abstract numbering. In your case it even would be two, as you want using decimal numbering as well as bullets in second level. This ist not possible using one abstract numbering.

    What I would do is using the built in numbering List Number 2 as well as List Bullet 2 for second level. This leads nearly exact to what you seems to want. And, at least for me, it works using Google Docs too. It uses the approach of Multiple Numbered Lists with python-docx to restart numbering after second header.

    Code:

    from docx import Document
    from docx.text.paragraph import Paragraph
    
    def prepare_numberings(document: Document, style_name: str) -> int:
        #prepare the numberings to have a new numbering, which points to the same abstract numbering, 
        #the style style_name also points to but has start override set
        styles = document.styles
        #get numId to which style style_name links
        num_id_list_number = -1
        for style in styles:
            if (style.name == style_name):
                num_id_list_number = style._element.pPr.numPr.numId.val
        #add new numbering linking to same abstractNumId but has startOverride 
        #and get new numId
        num_id_list_number_new = -1
        if (num_id_list_number > -1):        
            ct_numbering = document.part.numbering_part.numbering_definitions._numbering
            ct_num = ct_numbering.num_having_numId(num_id_list_number)
            abstractNumId = ct_num.abstractNumId.val
            ct_num = ct_numbering.add_num(abstractNumId)
            num_id_list_number_new = ct_num.numId
            startOverride = ct_num.add_lvlOverride(0)._add_startOverride()
            startOverride.val = 1
        return num_id_list_number_new;
        
    def set_link_to_numId(paragraph: Paragraph, num_id: int):
        if num_id > -1:
            numPr = paragraph._element.pPr._add_numPr()
            numPr._add_numId().val = num_id
    
    def add_new_list_item(doc: Document, text: str, list_style: str, start_new: bool = False):
        p = doc.add_paragraph(text, style=list_style)
        if start_new:
           num_id_list_number_new = prepare_numberings(document, list_style)
           set_link_to_numId(p, num_id_list_number_new)
    
    # Main program logic
    
    document = Document()
    document.add_heading('First Header')
    add_new_list_item(doc=document, text='First Item', list_style='List Number', start_new=True)
    add_new_list_item(doc=document, text='Second Item', list_style='List Number')
    add_new_list_item(doc=document, text='Third Item', list_style='List Number')
    
    prev_li = None
    document.add_heading('Second Header')
    add_new_list_item(doc=document, text='First Item', list_style='List Number', start_new=True)
    add_new_list_item(doc=document, text='First SubItem', list_style='List Number 2')
    add_new_list_item(doc=document, text='Second SubItem', list_style='List Number 2')
    add_new_list_item(doc=document, text='Second Item', list_style='List Number')
    add_new_list_item(doc=document, text='Third Item', list_style='List Number')
    add_new_list_item(doc=document, text='Third SubItem', list_style='List Bullet 2')
    add_new_list_item(doc=document, text='Fourth Item', list_style='List Number')
    
    document.save('lists_test.docx')
    

    Result:

    enter image description here