I'm trying to get each page of "Page Break Preview". Precisely speaking, I need to know which cells Page 1 contains, which cells Page 2 contains and so on.
Could you teach me how to realize this functionality in Python 3.12? I tried openpyxl
, but other libraries are welcome too.
I find openpyxl
has print_area
variable, but this variable only returns the whole print area, which contains all pages, so I can't know which page each cell belongs to.
As you can see the code and the screenshot, the result is A1:R50
, which contains both Page 1 and Page 2.
In this example, the expected result is something like {'Page1': 'A1:I50', 'Page2':'J1:R50'}
.
from openpyxl import load_workbook
workbook = load_workbook(filename="./sample_loader_data/sample.xlsx",
read_only=True,
data_only=True,
)
workbook.worksheets[0].print_area # Result: "'Test worksheet'!$A$1:$R$50"
As commented here, this is not possible with openpyxl
UPDATE: As @dmjy correctly answered, it is now possible with openpyxl
.
from openpyxl import load_workbook
from openpyxl.utils.cell import get_column_letter
import re
workbook = load_workbook(filename="Demo.xlsx")
vertical_breaks = [0]+[page_break.id for page_break in workbook.worksheets[0].col_breaks.brk]
horizontal_breaks = [0]+[page_break.id for page_break in workbook.worksheets[0].row_breaks.brk]
last_cell = workbook.worksheets[0].dimensions.split(":")[1]
last_col = ''.join(re.findall(r'[a-zA-Z]+', last_cell))
last_row = ''.join(re.findall(r'\d+', last_cell))
pages = {}
cnt=1
for v, v_break in enumerate(vertical_breaks):
for h, h_break in enumerate(horizontal_breaks):
start_col = get_column_letter(vertical_breaks[v]+1)
start_row = horizontal_breaks[h]+1
end_col = get_column_letter(vertical_breaks[v+1]) if v+1 < len(vertical_breaks) else last_col
end_row = horizontal_breaks[h+1] if h+1 < len(horizontal_breaks) else last_row
pages[f"Page{cnt}"] = f"{start_col}{start_row}:{end_col}{end_row}"
cnt+=1
print(pages)
For this example
It correctly outputs
{'Page1': 'A1:F27', 'Page2': 'A28:F54', 'Page3': 'G1:N27', 'Page4': 'G28:N54', 'Page5': 'O1:T27', 'Page6': 'O28:T54'}
Alternative with xlrd
for .xls
files.
import xlrd
def num_to_excel_col(n):
if n < 1:
raise ValueError("Number must be positive")
result = ""
while True:
if n > 26:
n, r = divmod(n - 1, 26)
result = chr(r + ord('A')) + result
else:
return chr(n + ord('A') - 1) + result
workbook = xlrd.open_workbook("Demo.xls", formatting_info=True)
worksheet = workbook.sheets()[0]
vertical_breaks = worksheet.vertical_page_breaks
vertical_breaks.insert(0, (0,0,0))
horizontal_breaks = worksheet.horizontal_page_breaks
horizontal_breaks.insert(0, (0,0,0))
last_row = worksheet.nrows
last_col = worksheet.ncols
pages = {}
cnt=1
for v, v_break in enumerate(vertical_breaks):
for h, h_break in enumerate(horizontal_breaks):
start_col = num_to_excel_col(vertical_breaks[v][0]+1)
start_row = horizontal_breaks[h][0]+1
end_col = num_to_excel_col(vertical_breaks[v+1][0] if v+1 < len(vertical_breaks) else last_col)
end_row = horizontal_breaks[h+1][0] if h+1 < len(horizontal_breaks) else last_row
pages[f"Page{cnt}"] = f"{start_col}{start_row}:{end_col}{end_row}"
cnt+=1
print(pages)
{'Page1': 'A1:F27', 'Page2': 'A28:F54', 'Page3': 'G1:N27', 'Page4': 'G28:N54', 'Page5': 'O1:T27', 'Page6': 'O28:T54'}