Skip to content
This repository has been archived by the owner on May 26, 2022. It is now read-only.

Enable est column width calculation #866

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Spout/Writer/Common/Entity/Options.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ abstract class Options
// Multisheets options
public const TEMP_FOLDER = 'tempFolder';
public const DEFAULT_ROW_STYLE = 'defaultRowStyle';
public const ROWWIDTH_CALC_STYLE = 'rowCalcMethod';
public const ROWWIDTH_FIXED = 'rowFixedWith';
public const SHOULD_CREATE_NEW_SHEETS_AUTOMATICALLY = 'shouldCreateNewSheetsAutomatically';

// XLSX specific options
Expand Down
114 changes: 114 additions & 0 deletions src/Spout/Writer/Common/Entity/Worksheet.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,22 @@ class Worksheet

/** @var int Index of the last written row */
private $lastWrittenRowIndex;

/** @var array Array of the column widths */
protected $columnWidths;

/** @var int Width calculation style */
protected $widthCalcuationStyle;

/** @var int Fixed sheet width for fixed width calculation style */
protected $fixedSheetWidth;

public const W_FULL = 1;
public const W_FIXED = 2;
public const W_FULL_ALT = 3;
public const W_NONE = 0;
public const DEFAULT_COL_WIDTH = 30;
public const DEFAULT_FIXED_WIDTH = 320;

/**
* Worksheet constructor.
Expand All @@ -36,6 +52,8 @@ public function __construct($worksheetFilePath, Sheet $externalSheet)
$this->externalSheet = $externalSheet;
$this->maxNumColumns = 0;
$this->lastWrittenRowIndex = 0;
$this->columnWidths = [];
$this->widthCalcuationStyle = 0;
}

/**
Expand Down Expand Up @@ -78,6 +96,79 @@ public function getMaxNumColumns()
return $this->maxNumColumns;
}

/**
* @return array
*/
public function getColumnWidths()
{
return $this->columnWidths;
}

/**
* Gets the calculated max column width for the specified index
* @param int $zeroBasedIndex
* @return int
*/
public function getMaxColumnWidth($zeroBasedIndex)
{
if (isset($this->columnWidths[$zeroBasedIndex])) {
return $this->columnWidths[$zeroBasedIndex];
}

$this->columnWidths[$zeroBasedIndex] = self::DEFAULT_COL_WIDTH;
return $this->columnWidths[$zeroBasedIndex];
}

/**
* Sets the calculated max column width for the specified index
* @param int $zeroBasedIndex
* @param int $value Value to set to
* @return void
*/
public function setMaxColumnWidth($zeroBasedIndex, $value)
{
$curSize = $this->columnWidths[$zeroBasedIndex] ?? 0;
if ($curSize < $value) {
$this->columnWidths[$zeroBasedIndex] = $value;
}
}

/**
* Automatically calculates and sets the max column width for the specified cell
* @param Cell $cell The cell
* @param Style $style Row/Cell style
* @param int $zeroBasedIndex of cell
* @return void
*/
public function autoSetWidth($cell, $style, $zeroBasedIndex)
{
$size = 1 + mb_strlen($cell->getValue());//ensure we have at least 1 space
$size *= $style->isFontBold() ? 1.2 : 1.0;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did you come up with the 1.2 ratio?

Copy link
Author

@xwiz xwiz Feb 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's actually meant to be 700/400.. but the 1.2 seemed to fit better.. and that may be because tinier letters like 'i', like you rightly pointed out; occupy less space..

$this->setMaxColumnWidth($zeroBasedIndex, $size);
}

/**
* Gets the fixed sheet width or returns the default if not available
* @return int
*/
public function getFixedSheetWidth()
{
if (!$this->fixedSheetWidth) {
return Worksheet::DEFAULT_FIXED_WIDTH;
}
return $this->fixedSheetWidth;
}

/**
* Sets the fixed sheet width
* @param int $width
* @return void
*/
public function setFixedSheetWidth($width)
{
$this->fixedSheetWidth = $width;
}

/**
* @param int $maxNumColumns
*/
Expand All @@ -86,6 +177,29 @@ public function setMaxNumColumns($maxNumColumns)
$this->maxNumColumns = $maxNumColumns;
}

/**
* Set the with calculation style for this sheet.
* 1=FullExpand,2=FixedWidth,0=None
*
* @return Worksheet Enable method chaining for easy set width
*/
public function setWidthCalculation($widthStyle)
{
$this->widthCalcuationStyle = $widthStyle;
return $this;
}

/**
* Get the with calculation style for this sheet.
* 1=FullExpand,2=FixedWidth,0=None
*
* @return void
*/
public function getWidthCalculation()
{
return $this->widthCalcuationStyle;
}

/**
* @return int
*/
Expand Down
48 changes: 48 additions & 0 deletions src/Spout/Writer/Common/Helper/AppendHelper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

namespace Box\Spout\Writer\Common\Helper;

class AppendHelper {

/**
* Instead of seeking and re-writing from position, a better hack might be to write dummy empty data
* Enough to take care of any length, then carefully overwrite
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that's the strategy I've used elsewhere

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adrilo Benchmark result with overwriting empty spaces... (negligible again but may be useful)

image

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested 5,000 rows, 500,000 rows and 50,000 rows... The script is included below..

<?php

use Box\Spout\Common\Type;
use Box\Spout\Writer\Common\Creator\Style\StyleBuilder;
use Box\Spout\Writer\Common\Creator\WriterEntityFactory;

require __DIR__ . '/vendor/autoload.php';
ini_set('memory_limit', -1);

function generateRows()
{
    $headers = [];
    for($i=0;$i<10;$i++) {
        $n = $i+1;
        $headers[] = "COL-$n";
    }
    
    yield $headers;
    
    for($i=0; $i< 500000; $i++) {
        $row = [];
        $rand = mt_rand(1, 30);
        foreach ($headers as $header) {
            $randLen = mt_rand(1, $rand);
            $row[] = generateRandomString($randLen);
        }
        yield $row;
    }
}

function generateRandomString($length = 10) {
    $characters = 'aaaaeeeeiiiiiiiioooouuuubcdefghijklmnopqrstuvwxyz';
    $charactersLength = strlen($characters);
    $randomString = '';
    for ($i = 0; $i < $length; $i++) {
        $randomString .= $characters[rand(0, $charactersLength - 1)];
    }
    return $randomString;
}

$data = generateRows();
$defaultStyle = (new StyleBuilder())
    ->setFontSize(36)
    ->build();
$boldtyle = (new StyleBuilder())
    ->setFontSize(42)
    ->setFontBold()
    ->build();
//$writer = WriterEntityFactory::createWriter(Type::ODS);
$start = hrtime(true);
$writer = WriterEntityFactory::createWriter(Type::XLSX);
$writer->setWidthCalculation(1)->setDefaultRowStyle($defaultStyle)->openToFile('spout_trunk.xlsx');
//$writer->openToFile('spouter.xlsx');

foreach ($data as $i => $row) {
    if ($i == 0) {
        $writer->addRow(WriterEntityFactory::createRowFromArray($row, $boldtyle));
    } else {
        $writer->addRow(WriterEntityFactory::createRowFromArray($row));
    }
}

$writer->close();

$end = hrtime(true);
$elapsed = ($end - $start)/1000000.0;
echo "$elapsed ms counted with truncate".PHP_EOL;


$data = generateRows();
$start = hrtime(true);
$writer = WriterEntityFactory::createWriter(Type::XLSX);
//width calculation style 3 just for testing purposes
$writer->setWidthCalculation(3)->setDefaultRowStyle($defaultStyle)->openToFile('spout_over.xlsx');
//$writer->openToFile('spouter.xlsx');

foreach ($data as $i => $row) {
    if ($i == 0) {
        $writer->addRow(WriterEntityFactory::createRowFromArray($row, $boldtyle));
    } else {
        $writer->addRow(WriterEntityFactory::createRowFromArray($row));
    }
}

$writer->close();

$end = hrtime(true);
$elapsed = ($end - $start)/1000000.0;

echo "$elapsed ms counted with overwrite";

*
*/

/**
* This function will truncate from specified position
* Write data to be inserted and re-append the truncated data
*
* @param $fp Pointer to file only
* @param $pos Position to insert
* @param $content Content to insert
*/
public static function insertToFile($fp, $pos, $content)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be very costly to rewrite the contents, especially with large spreadsheet

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, I will try both approaches and do a benchmark today. It didn't seem to matter for small files.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adrilo So I ran some tests and this is not actually a bad solution at all... (performance differences range from 3% to 7% and seems to get better for larger amount of rows).. (Only tested 100,000, 500,000 and 1 million rows though)

The difference are quite negligible most likely because the file is still in memory the whole time and the only costly operation here is really calling stream_get_contents.

image
image

{
fseek($fp, $pos);
$trailer = stream_get_contents($fp);
ftruncate($fp, $pos);
fseek($fp, $pos);
fwrite($fp, $content);
fwrite($fp, $trailer);
return $fp;
}

/**
* This function overwrite data in pointer from specified position
*
* @param $fp Pointer to file only
* @param $pos Position to insert
* @param $content Content to insert
*/
public static function overwriteToFile($fp, $pos, $content)
{
$cur = ftell($fp);
fseek($fp, $pos);
fwrite($fp, $content);
fseek($fp, $cur);
return $fp;
}

}
7 changes: 4 additions & 3 deletions src/Spout/Writer/ODS/Creator/ManagerFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public function createWorkbookManager(OptionsManagerInterface $optionsManager)

$styleMerger = $this->createStyleMerger();
$styleManager = $this->createStyleManager($optionsManager);
$worksheetManager = $this->createWorksheetManager($styleManager, $styleMerger);
$worksheetManager = $this->createWorksheetManager($optionsManager, $styleManager, $styleMerger);

return new WorkbookManager(
$workbook,
Expand All @@ -63,16 +63,17 @@ public function createWorkbookManager(OptionsManagerInterface $optionsManager)
}

/**
* @param OptionsManagerInterface $optionsManager
* @param StyleManager $styleManager
* @param StyleMerger $styleMerger
* @return WorksheetManager
*/
private function createWorksheetManager(StyleManager $styleManager, StyleMerger $styleMerger)
private function createWorksheetManager(OptionsManagerInterface $optionsManager, StyleManager $styleManager, StyleMerger $styleMerger)
{
$stringsEscaper = $this->helperFactory->createStringsEscaper();
$stringsHelper = $this->helperFactory->createStringHelper();

return new WorksheetManager($styleManager, $styleMerger, $stringsEscaper, $stringsHelper);
return new WorksheetManager($optionsManager, $styleManager, $styleMerger, $stringsEscaper, $stringsHelper);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/Spout/Writer/ODS/Helper/FileSystemHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ public function createContentFile($worksheetManager, $styleManager, $worksheets)
EOD;

$contentXmlFileContents .= $styleManager->getContentXmlFontFaceSectionContent();
$contentXmlFileContents .= $styleManager->getContentXmlAutomaticStylesSectionContent($worksheets);
$contentXmlFileContents .= $styleManager->getContentXmlAutomaticStylesSectionContent($worksheetManager, $worksheets);

$contentXmlFileContents .= '<office:body><office:spreadsheet>';

Expand Down
3 changes: 3 additions & 0 deletions src/Spout/Writer/ODS/Manager/OptionsManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ protected function getSupportedOptions()
return [
Options::TEMP_FOLDER,
Options::DEFAULT_ROW_STYLE,
Options::ROWWIDTH_CALC_STYLE,
Options::ROWWIDTH_FIXED,
Options::SHOULD_CREATE_NEW_SHEETS_AUTOMATICALLY,
];
}
Expand All @@ -45,5 +47,6 @@ protected function setDefaultOptions()
$this->setOption(Options::TEMP_FOLDER, \sys_get_temp_dir());
$this->setOption(Options::DEFAULT_ROW_STYLE, $this->styleBuilder->build());
$this->setOption(Options::SHOULD_CREATE_NEW_SHEETS_AUTOMATICALLY, true);
$this->setOption(Options::ROWWIDTH_CALC_STYLE, 0);
}
}
5 changes: 4 additions & 1 deletion src/Spout/Writer/ODS/Manager/Style/StyleManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,16 @@ public function getContentXmlFontFaceSectionContent()
/**
* Returns the contents of the "<office:automatic-styles>" section, inside "content.xml" file.
*
* @param WorksheetManager $manager
* @param Worksheet[] $worksheets
* @return string
*/
public function getContentXmlAutomaticStylesSectionContent($worksheets)
public function getContentXmlAutomaticStylesSectionContent($manager, $worksheets)
{
$content = '<office:automatic-styles>';

$content .= $manager->getWidthStylesContent($worksheets[0]);

foreach ($this->styleRegistry->getRegisteredStyles() as $style) {
$content .= $this->getStyleSectionContent($style);
}
Expand Down
Loading