from copy import deepcopy
from typing import Union, AnyStr, List, Optional, Sequence, Tuple
import numpy as np
import pandas as pd
from mixins.repr import ReprMixin
from pyexlatex.table.logic.panels.combine import (
common_column_labels,
common_row_labels,
remove_label_collections_from_grid
)
from pyexlatex.table.logic.panels.letters import panel_string
from pyexlatex.table.logic.panels.topleft import _set_top_left_corner_labels
from pyexlatex.table.models.labels.table import LabelTable, LabelCollection
from pyexlatex.table.models.panels.panel import Panel
from pyexlatex.table.models.panels.panel import PanelGrid, GridShape
from pyexlatex.table.models.spacing.columntable import ColumnPadTable
from pyexlatex.table.models.spacing.rowtable import RowPadTable
from pyexlatex.table.models.table.section import TableSection
from pyexlatex.table.models.data.table import DataTable
[docs]class PanelCollection(ReprMixin):
"""
Lays panel contents in a grid, consolidates labels, creates padding between tables
"""
repr_cols = ['name', 'panels']
_num_columns: int
[docs] def __init__(self, panels: List[Panel], label_consolidation: str='object', enforce_label_order=True,
top_left_corner_labels: Union[LabelTable, LabelCollection, List[AnyStr], AnyStr]=None,
pad_rows: int=1, pad_columns: int=1, name: str=None):
"""
:param panels: list of Panels, which represent a full set of rows of the table. for multiple
sections in one set of rows, create DataTables for each section and pass to Panels.
:param label_consolidation: pass 'object' to compare object equality for label consolidation, 'str'
for converting all labels to strings then comparing equality. Use 'object'
for more control over consolidation.
:param enforce_label_order: pass False to allow consolidating lower labels even if upper labels do not match.
e.g. if labels on one table are [['Top1'], ['Bot1', 'Bot2']], then labels on the other
table are [['Top2'], ['Bot1', 'Bot2']], consolidated labels when passing False will be
['Bot1', 'Bot2'], while when passing True, no labels will be consolidated. Under True,
will start from the top label, then stop consolidating once it has a mismatch.
:param top_left_corner_labels: additional labels to place in the top left corner. pass a single string
or a list of strings for convenience. a list of strings will be create labels
which span the gap horizontally and go downwards, one label per row. pass
LabelCollection or LabelTable for more control.
:param pad_rows: horizontal spacing to put between panels
:param pad_columns: vertical spacing to put between TableSections
:param name: name that will be used to construct caption in output
"""
self.name = name
self.panels = panels
self.label_consolidation = label_consolidation.lower().strip() \
if isinstance(label_consolidation, str) else label_consolidation
self.enforce_label_order = enforce_label_order
self.top_left_corner_labels = _set_top_left_corner_labels(top_left_corner_labels)
self.pad_rows = pad_rows
self.pad_columns = pad_columns
self.has_row_labels = False
self.has_column_labels = False
self.consolidate_labels()
self.pad_grid()
[docs] def iterpanels(self, add_panel_order_label: bool = True):
"""
First panel is headers. Then each original panel
self.grid includes all panels as well as labels. Need to separate back out to
get each panel
:param add_panel_order_label: Whether to add to names of panels Panel A:, Panel B:,
and so on
:return:
:rtype:
"""
# Get lengths of panels
if len(self.panels) > 1:
# Include spacing which is added in pad_grid
panel_lengths = [panel.panel_grid.shape[0] * 2 - 1 for panel in self.panels]
else:
panel_lengths = [self.panels[0].panel_grid.shape[0]]
if self.has_column_labels:
# First panel will have an additional row at the top if there are column labels
panel_lengths[0] += 1
total_row_idx = 0
for panel_idx, panel_length in enumerate(panel_lengths):
for row_idx in range(panel_length):
row = self.rows[total_row_idx]
# if first part of actual panel, below labels, and panel has a name
if (
(
# If it's the first panel, and there's column labels, then second row should show name
self.has_column_labels and panel_idx == 0 and row_idx == 1 or
# If it's a later panel, or there's no column labels, first row should show name
(not self.has_column_labels or panel_idx > 0) and row_idx == 0
) and
# Has to have name to show name
self.panels[panel_idx].name is not None
):
# output with the name
if add_panel_order_label:
full_name = panel_string(panel_idx) + self.panels[panel_idx].name
else:
full_name = self.panels[panel_idx].name
yield Panel(PanelGrid([row]), name=full_name)
else:
# column labels panel, no matching name. Or no name for user supplied panel
yield Panel(PanelGrid([row]))
total_row_idx += 1
# If we are in-between panels, not on the last panel
if panel_idx + 1 != len(self.panels) and self.pad_rows > 0:
for _ in range(self.pad_rows):
# In between panels, there are also spacers, yield those
row = self.rows[total_row_idx]
yield Panel(PanelGrid([row]))
total_row_idx += 1
@property
def rows(self):
try:
return self._rows
except AttributeError:
self._rows = self._create_panel_rows()
return self._rows
@property
def num_columns(self) -> int:
try:
return self._num_columns
except AttributeError:
self._num_columns = max([row.num_columns for row in self.rows])
return self._num_columns
def _create_panel_rows(self):
rows, self._num_columns = _create_panel_rows_from_grid(self.grid)
return rows
@property
def grid(self):
try:
return self._grid
except AttributeError:
# Copy to avoid modifying original contents
self._grid = deepcopy(_concatenate_uneven_rows_filling_right(
[panel.panel_grid for panel in self.panels],
fill_value=TableSection([]),
array_class=GridShape
))
return self._grid
[docs] def consolidate_labels(self):
# TODO [#46]: reduce complexity and improve testing of label consolidation
#
# This code has grown quite a bit to handle all the cases, and it
# still has not been thoroughly tested for every case. Need to expand
# the testing to cover all the cases and then try to simplify the code
if self.label_consolidation is None:
return
if self.label_consolidation == 'object':
use_object_equality = True
elif self.label_consolidation in ('string', 'str', True):
use_object_equality = False
else:
raise ValueError(f'must pass object, string, or None to label consolidation. Got {self.label_consolidation}')
column_labels: [LabelTable] = common_column_labels(
self.grid,
use_object_equality=use_object_equality,
enforce_label_order=self.enforce_label_order
)
# Column labels may be modified to add top left corner label, but need to
# track original so it can be removed from existing panels
orig_column_labels = deepcopy(column_labels)
row_labels: [LabelTable] = common_row_labels(
self.grid,
use_object_equality=use_object_equality,
enforce_label_order=self.enforce_label_order
)
# Remove from the original tables the labels that were just consolidated
removed_indices = remove_label_collections_from_grid(
self.grid,
column_labels=orig_column_labels,
row_labels=row_labels,
use_object_equality=use_object_equality
)
# When there are multiple sub-tables horizontally and
# they have had their row labels consolidated, need
# to remove any blank column labels which were there
# for the row labels
for loc in removed_indices['rows']:
if loc[1] == 0:
# First column, don't need to remove label
continue
section: DataTable = self.grid[loc]
if section.column_labels is not None and section.column_labels.begins_with(' '):
# Middle sub-table which had spacer for row labels
# but now does not need it
section.column_labels.split_bottom_left()
if column_labels is not None:
if row_labels is None and not self.top_left_corner_labels.is_spacer:
# If there are column labels but not row labels, still need to deal with top left label.
# Adding the top left label is handled in the if row_labels is not None block, but it will
# not be reached as row_labels is None. Therefore create a blank label table for each grid row
# except for the column labels. The top left label to go with the column row will be added
# in the following block.
# Grid shape -1 to exclude just added column labels
row_labels: List[LabelTable] = []
for grid_row in self.grid:
left_value: TableSection = grid_row[0]
if left_value in column_labels:
# top left corner label, handled separately in row labels section
continue
section_height = len(left_value.rows)
if isinstance(left_value, DataTable):
if left_value.column_labels in column_labels:
# Adjust for when the column labels have been consolidated into the overall column labels,
# then don't need to add row label as this column label won't be in the output
section_height -= 1
label_lol = [['']] * section_height
row_labels.append(LabelTable.from_list_of_lists(label_lol))
column_labels_created = False
if row_labels is not None:
if column_labels is not None:
self._add_column_labels(column_labels)
column_labels_created = True
# After adding column labels, there is an additional row at the top of the grid
# Therefore we will need one additional LabelTable for the first row, which is the row of column labels
# If top_left_corner_labels was passed on object creation, use that as LabelTable. Otherwise use a blank one
if self.has_column_labels:
# Determine whether to use panel collection TL labels or whether
# they are already in existing label table from data table
num_labels = sum(label.cell_width for label in column_labels)
# Form a temporary grid skipping the header to determine the number of columns
temp_grid, _ = _add_row_labels_to_grid(self.grid[1:,:], row_labels)
temp_rows, num_columns = _create_panel_rows_from_grid(temp_grid)
self._num_columns = num_columns
tl_corner_label = self.top_left_corner_labels
if num_labels >= num_columns:
# Must already be top left corner label included in table, because
# there are already enough values. Split this off the column label,
# as instead the top row label will be used
for col_label in column_labels:
# TODO [#70]: better label consolidation for multiple sub-tables horizontally
#
# In this situation, it is not clear from which sub-tables the indices
# were consolidated. Right now, removing column headers as if there
# was full label consolidation (same index on every table).
# Need to start tracking for which sub-tables were the indices removed.
first_col_label = col_label.split_bottom_left()
if self.top_left_corner_labels.is_spacer:
# If we don't have any top left corner labels at the collection
# level, then use the first column label
tl_corner_label = first_col_label
all_row_labels = [tl_corner_label] + row_labels
else:
all_row_labels = row_labels
self._add_row_labels(all_row_labels)
elif column_labels is not None:
# There are no common row labels, but if there are any row labels which are not being consolidated,
# still need to add the top left corner
# First detect the existence of any row labels
any_row_labels = False
for grid_row in self.grid:
left_value: TableSection = grid_row[0]
if isinstance(left_value, DataTable) and left_value.has_row_labels:
any_row_labels = True
if any_row_labels:
# Now add the top left corner
column_labels[0] = self.top_left_corner_labels + column_labels[0]
if column_labels is not None and not column_labels_created:
self._add_column_labels(column_labels)
[docs] def pad_grid(self):
row_pad = RowPadTable()
grid_rows: [GridShape] = []
for n_row, grid_row in enumerate(self.grid):
# Add first elem
new_row = np.array([grid_row[0]]).view(GridShape).reshape(1,1)
# Add following elems
for n_elem, elem in enumerate(grid_row[1:]):
# Add pads between following elems
if self.pad_columns and not (n_elem == 0 and self.has_row_labels): # only skip first if there are row labels
new_row = np.append(new_row, np.array([ColumnPadTable(self.pad_columns)])).view(GridShape)
new_row = np.append(new_row, elem).view(GridShape)
new_row = new_row.reshape((1, new_row.shape[0])) # reorganize into row
grid_rows.append(new_row)
# add row padding on every loop except last
if n_row != self.grid.shape[0] - 1:
# no need to add padding after column labels
if n_row == 0 and self.has_column_labels:
continue
for i in range(self.pad_rows):
grid_rows.append(
np.array([row_pad]).view(GridShape).reshape(1,1)
)
# Before combining rows, must have same number of elements. Pad right with empty label tables
num_grid_columns = max(row.shape[1] for row in grid_rows) # find max number of columns
out_grid_rows = []
for row in grid_rows:
pad_number = num_grid_columns - row.shape[1]
assert pad_number >= 0
new_row = np.append(row, np.array([row_pad] * pad_number)).view(GridShape)
new_row = new_row.reshape((1, new_row.shape[0])) # reorganize into row
out_grid_rows.append(new_row)
new_grid = np.concatenate(out_grid_rows).view(GridShape)
self._grid = new_grid
self._rows = self._create_panel_rows() # need to recreate rows with new grid
def _add_column_labels(self, column_labels: List[LabelTable]):
assert len(column_labels) == self.grid.shape[1]
if all(table.is_empty for table in column_labels):
# if no consolidated labels, no need to add
self.has_column_labels = False
return
self.has_column_labels = True
# Form PanelGrid from labels
column_label_grid = PanelGrid(column_labels, shape=(1,len(column_labels)))
# Combine label PanelGrid and existing PanelGrid
self._grid = np.concatenate([column_label_grid, self._grid]).view(GridShape)
def _add_row_labels(self, row_labels: List[LabelTable]):
self._grid, self.has_row_labels = _add_row_labels_to_grid(self._grid, row_labels)
[docs] @classmethod
def from_list_of_lists_of_dfs(cls, df_list_of_lists: List[List[pd.DataFrame]],
panel_names: List[str] = None, *args,
panel_kwargs={},
data_table_kwargs={}, **kwargs):
"""
To create a single panel table, pass a single list within
a list of DataFrames, e.g. [[df1, df2]] then shape will specify how the DataFrames will
be organized in the Panel. If you pass two lists within the outer list, then shape will
apply to each Panel. So [[df1, df2], [df3, df4]] with shape=(1,2) create a two Panel table
with two tables placed within each panel going horizontally, so that the overall shape is (2,2).
Note: convenience method for if not much control over table is needed.
To apply different options to each panel, construct them individually using
Panel.from_df_list
:param df_list_of_lists:
:param panel_names: list of panel names. Must be of same length as outer list in df_list_of_lists
:param args: args to pass to PanelCollection constructor
:param panel_kwargs: Panel.from_df_list kwargs. Same kwargs will be passed to all panels
:param kwargs: kwargs to pass to PanelCollection constructor
:param data_table_kwargs: kwargs to be passed to DataTable.from_df. Same kwargs will be passed to
all data tables.
:return: PanelCollection
"""
_validate_panel_names(panel_names, df_list_of_lists)
panels = []
for i, df_list in enumerate(df_list_of_lists):
panel_name = _panel_name_or_none(panel_names, i)
panels.append(
Panel.from_df_list(
df_list,
name=panel_name,
data_table_kwargs=data_table_kwargs,
**panel_kwargs)
)
label_consolidation = kwargs.pop('label_consolidation', 'string')
return cls(
panels,
label_consolidation=label_consolidation,
**kwargs
)
[docs] def to_tex(self, mid_rule=True):
from pyexlatex.table.logic.table.build import build_tabular_content_from_panel_collection
return build_tabular_content_from_panel_collection(self, mid_rule=mid_rule)
def _panel_name_or_none(panel_names: Optional[List[str]], index: int):
if panel_names is not None:
return panel_names[index]
else:
return None
def _validate_panel_names(panel_names: Optional[List[str]], df_list_of_lists: List[List[pd.DataFrame]]):
if panel_names is None:
return
num_panel_names = len(panel_names)
num_panels = len(df_list_of_lists)
if num_panel_names != num_panels:
raise ValueError(f'must pass as many panel names as panels. Got {num_panel_names} names '
f'and {num_panels} panels.')
def _concatenate_uneven_rows_filling_right(rows, fill_value=np.nan, array_class=None):
"""
Concatenates along vertical axis, filling right as needed.
Examples:
a = np.array(
[[1, 2, 3]]
)
b = np.array(
[[4, 5]]
)
_concatenate_uneven_rows_filling_right([a, b])
array([[ 1., 2., 3.],
[ 4., 5., nan]])
Args:
rows:
fill_value:
array_class:
Returns:
"""
max_len = max([row.shape[1] for row in rows])
concat_rows = []
for row in rows:
num_to_add = max_len - row.shape[1]
if num_to_add > 0:
add_array = np.array([[fill_value] * num_to_add])
concat_array = np.concatenate([row, add_array], axis=1)
else:
concat_array = row
concat_rows.append(concat_array)
out_arr = np.concatenate(concat_rows)
if array_class:
out_arr = out_arr.view(array_class)
return out_arr
def _add_row_labels_to_grid(grid: GridShape, row_labels: List[LabelTable]) -> Tuple[GridShape, bool]:
assert len(row_labels) == grid.shape[0]
if all(table.is_empty for table in row_labels):
# if no consolidated labels, no need to add
return grid, False
# Form PanelGrid from labels
row_label_grid = PanelGrid(row_labels, shape=(len(row_labels), 1))
# Combine label PanelGrid and existing PanelGrid
new_grid = np.concatenate([row_label_grid, grid], axis=1).view(GridShape)
return new_grid, True
def _create_panel_rows_from_grid(grid: GridShape) -> Tuple[List[TableSection], int]:
rows: List[TableSection] = []
for panel_row in grid:
new_row = None
for i, section in enumerate(panel_row):
if i == 0:
new_row = section
else:
new_row = new_row + section
if new_row:
rows.append(new_row)
num_columns = max([row.num_columns for row in rows])
# Now pad rows
rows = deepcopy(rows) # avoid modifying inplace
for row in rows:
row.pad(num_columns, direction='right')
return rows, num_columns