From 1179278b7800633f40f940c3b2b4bdee1fdc1721 Mon Sep 17 00:00:00 2001 From: Alfonso Uceda Pompa Date: Fri, 8 Apr 2016 17:15:36 +0200 Subject: [PATCH] Removed hyperlinks when parsing files in streaming mode --- CHANGELOG.md | 4 ++++ lib/roo/excelx.rb | 1 + lib/roo/excelx/sheet_doc.rb | 9 +++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d330b9c..c4f1848e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## [Unreleased] +### Added +- Discard hiperlinks lookups to allow streaming parsing without loading whole files + ## [2.4.0] 2016-05-14 ### Fixed - Fixed opening spreadsheets with charts [315](https://github.com/roo-rb/roo/pull/315) diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb index e120d85a..369d5e38 100644 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -36,6 +36,7 @@ def initialize(filename_or_stream, options = {}) cell_max = options.delete(:cell_max) sheet_options = {} sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false) + sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false) unless is_stream?(filename_or_stream) file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed) diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb index 9788c1d2..f8d0cae3 100755 --- a/lib/roo/excelx/sheet_doc.rb +++ b/lib/roo/excelx/sheet_doc.rb @@ -39,8 +39,13 @@ def each_row_streaming(&block) def each_cell(row_xml) return [] unless row_xml row_xml.children.each do |cell_element| - key = ::Roo::Utils.ref_to_key(cell_element['r']) - yield cell_from_xml(cell_element, hyperlinks(@relationships)[key]) + # If you're sure you're not going to need this hyperlinks you can discard it + hyperlinks = unless @options[:no_hyperlinks] + key = ::Roo::Utils.ref_to_key(cell_element['r']) + hyperlinks(@relationships)[key] + end + + yield cell_from_xml(cell_element, hyperlinks) end end