From 1b7057412a35e0e5623212b7ce7bf70f6c1c2375 Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Thu, 12 Feb 2026 20:34:04 -0600 Subject: [PATCH] Handle missing cell references --- lib/creek/sheet.rb | 58 ++++++++++++++++++++- spec/fixtures/sample-missing-cell-ref.xlsx | Bin 0 -> 6936 bytes spec/sheet_spec.rb | 16 ++++++ 3 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 spec/fixtures/sample-missing-cell-ref.xlsx diff --git a/lib/creek/sheet.rb b/lib/creek/sheet.rb index 69d8497..64d3b3f 100644 --- a/lib/creek/sheet.rb +++ b/lib/creek/sheet.rb @@ -100,6 +100,9 @@ def rows_generator(include_meta_data = false, use_simple_rows_format = false) cell = nil cell_type = nil cell_style_idx = nil + last_cell_ref = nil + current_col_index = nil + increment_on_close = false @book.files.file.open(path) do |xml| prefix = '' name_row = 'row' @@ -123,9 +126,11 @@ def rows_generator(include_meta_data = false, use_simple_rows_format = false) row = node.attributes row['cells'] = {} cells = {} + last_cell_ref = nil + current_col_index = 0 y << (include_meta_data ? row : cells) if node.self_closing? elsif node.name == name_row && node.node_type == closer - processed_cells = fill_in_empty_cells(cells, row['r'], cell, use_simple_rows_format) + processed_cells = fill_in_empty_cells(cells, row['r'], last_cell_ref, use_simple_rows_format) @headers = processed_cells if with_headers && row['r'] == HEADERS_ROW_NUMBER if @images_present @@ -142,6 +147,33 @@ def rows_generator(include_meta_data = false, use_simple_rows_format = false) cell_type = node.attributes['t'] cell_style_idx = node.attributes['s'] cell = node.attributes['r'] + if cell.nil? || cell.empty? + row_number = row && row['r'] + if row_number && !current_col_index.nil? + cell = "#{column_index_to_letters(current_col_index)}#{row_number}" + end + else + col_letters = column_from_cell_ref(cell) + current_col_index = column_letters_to_index(col_letters) unless col_letters.nil? + end + last_cell_ref = cell unless cell.nil? + if node.self_closing? + current_col_index += 1 if !current_col_index.nil? + increment_on_close = false + cell = nil + cell_type = nil + cell_style_idx = nil + else + increment_on_close = true + end + elsif node.name == name_c && node.node_type == closer + if increment_on_close && !current_col_index.nil? + current_col_index += 1 + end + increment_on_close = false + cell = nil + cell_type = nil + cell_style_idx = nil elsif (node.name == name_v || node.name == name_t) && node.node_type == opener unless cell.nil? node.read @@ -170,7 +202,7 @@ def converter_options # Empty cells are being padded in using this function def fill_in_empty_cells(cells, row_number, last_col, use_simple_rows_format) new_cells = {} - return new_cells if cells.empty? + return new_cells if cells.empty? || row_number.nil? || last_col.nil? last_col = last_col.gsub(row_number, '') ('A'..last_col).to_a.each do |column| @@ -203,5 +235,27 @@ def cell_id(column, use_simple_rows_format, row_number) (with_headers && headers) ? headers[column] : column end + + def column_from_cell_ref(cell_ref) + cell_ref.to_s.upcase[/[A-Z]+/] + end + + def column_letters_to_index(column_letters) + return nil if column_letters.nil? || column_letters.empty? + + column_letters.chars.reduce(0) { |sum, char| (sum * 26) + (char.ord - 64) } - 1 + end + + def column_index_to_letters(column_index) + return nil if column_index.nil? || column_index.negative? + + result = +'' + index = column_index + while index >= 0 + result.prepend((index % 26 + 65).chr) + index = (index / 26) - 1 + end + result + end end end diff --git a/spec/fixtures/sample-missing-cell-ref.xlsx b/spec/fixtures/sample-missing-cell-ref.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ac912a7f0c3dabfa0dafb9780a10e3a9db04f033 GIT binary patch literal 6936 zcmZ`;1yq#Xx~99kJEevY1f)Zd?(WWkp{2W|yGuc0s3E1KOO);u6r{T(?)d-b9?#D` zch>s8z1FPtKC|c9Z$4X90Um(>1_lNN`q0A2SyPzl!@|JeAi}`lL%%VQ0y?`}IlG%^ z`nXuR8F6|$If7FqlzRawNFRRo-1fG5OvL#xjcKK&7v?VWF|dv!uz*r=W7%M*g*akZrqZRvkPF6FDtqH0s`BlNY@MD-s>q3WmH zy$d}8MK~B3bm(dVvT}6e;{5#vCy%{`=f(*-3OU00u{zci62-Tqfcpl&92>v{5!+|N zPgZLN0&xh~8qe3cQ?h`aq}Cl-_wQC&z6XZ}TWN(Bn=W7$@gBDa>MjS5gmiE81!GXK z^p>`E>WfW;smFlC`l`?vE#W)l+UJCp@?ZXlp$|Wu0-dsa&k~}Xh@Vd~p1A-ki;<2~ zaL;@pl@o48XcUwF7&hi4th-q6e?hP%9cw}w{IAN4;eNuRlK zyM_!QPni_7c?NqcL5C5%;dml+rnEc{v5v3ud3vo zC(k8reUP?nvc9Q4JB~ahlIA`?FL2b+iYv7#6BDZUE0RQ_TD*o)E3Ano%yq|e6F08z zGH6^aEIW`!sXH#N+$p=N;^7KQwchTk9{k44+VSjeHRojimLdEMvtgrRWO;qW zi}FL}Gw;T)%aOkt1w1J&?6g#io((PA01hwi?%NjBe)ejYhlawW4KGhFzIh#G#wcfB zt%*r^xSyInZC-xWH%(@1_8Nu_5!GLeHu-eq0Jrp(gT%gh69d#whMIOD!G~g>)XD1O zgv2+z3ddY7%GWrUT7_#qirD;>^244S|H?kw`=8o{`x-ddLz$f+>A7}R(_f5fr@4}A z&XGh>LyS-Tnl|C6e3vNo=L$YJ0&%P9Kh#x*7_jgV+o5=bWgs#X<=mETz{LWT474o+ z4*S8LwiYvZ-)#Xu(s`FmlUoD4Fc>+)5RKH(6}0r+kHuPN%(xW7>+^)K7mp8TXB%!X zx>A*X&MvDP2*nX)DNtd&e$~#!5DBEwT=6&)Wmbfz7#B3P98{t71)06{A3Ni!DyxD7 z=eAzn6y$%l0cRvo#Q*4DY;sptX@53dcXsA8RDw`xq4V;jCw)HiWL~VsAEuLwLln#q z?=mrYU?dDaD6cJZb2S+Bj#8e%?BC|cJ|=8(4fP*1R1su)qk$mOcT8OpvVqf3Bux6N zGvQTcg>K1ndxLLZ09Y>d)Nu!NP2o0&SMK%kCGy$c5}i>UET{vkN;vP08$Rnyj4@Dg z$Zk?|9Yj5VNgu)#BW!|brec3`n7$%66G%R!tx1|>QA}x=i{cC1B#SK3%1vko7$>yL zw4Zj+kcJ!MfqP%*N0<>r+EkyO^H(JEv`0$3a(kJWTJ}>C0!k5fkhpp=zkx{4GcSik zc~?jovopY|6WE_?gJxmp+P)XD}-PCxE0YkSpcmC&^axanC6`#Q8beP1c>c zjP0B}?LO20tv|=Oi0VUK(e7 z5QNzbRxP(2YkH47s%!iMHF{$qDi(z;Gv?DJq9{x!E@ZE9F^1BN%z$M8x3+_qOf8{M zVq{Xkfai$&Iwnn#_$8BPhhnjElG(io^L-Njf)(6>#cY#!{8wzGEmgfJv+{L`r3x$R zQR6QA3~5D$6QwW(wjvQ^ETV0OPMijC2 zV(cLH7TpPwa(PMFd3ZzOS5gwL&tPhBt3-w~Ubfbzjz~`8^Qk%QZ6Zj?e5R?%f4ttk z(Kz3|Nhyi^8c+2AI^UU?=q{IMBvHF*7cN9S98meXmAxbFIK&MdxUFF)23qAUt=(hZ+My{dBwVG zYVSTAgq4=HgA_-@iJuaJvl|0!IxNDnVY`W-&l8GT!}eOa3aE6&^U2kkkfb1Fl&al z(JG6-O_4{j>=sI7*}!uqo>gC6AWRDxTgVpf(19bXkqdZ_-}*=s=6j?Bsy(iit!P^*F_@teZV*DZLB? zuTtGF7^^d;_kDWV3Q0$rL9#-Ly*KxnFQ0!7-q-V*wy|k!VvC@ntX$<-+0h&9#@cNu z1zGxU<&10)yOXyGgt%=*fk6?D?4E#~mCumTGCU!Fe<9q&r7^mbig1JJfjfhoVrzhc zq@ks+Laaa5*Mav~3yKzM*A_)nx3O^cpX(whWwY22uH>pi4GN!85S93lXA8Hffn{qkhi)*eRk+ zO6Xszsl#-0l(>)2FIVZs*tjOS(U3zBKjd;I0a2QHj&Tb->3;819cRPz!6$vJru@0T z6PvET2%$0r*5TW(b?n~ZWS&hxrP6wO@FbV*vZlAA=S#|C$|%uiS3U^Q-sM`UCao_J z@(i+FSt|qg2&VP%j$&U5+kbIpcfCCM_T|WNZr$O^TW2Yj(ca-I(Xr{+gn(KUW+BI= zgB0Q?!vq*bUZUgm9=ez3w&Ve#w$hh!OqD z0xkk@g1S@|18eW{nBLXG(rF@+m8DHqXTH^QTu*2khV8$CgT)~f<56(*&US-Vey zB6ZqS{U*zlaFT&Bbq8!A@u_~;YB*e)^?AATEqv<1eDw*+S2?C|yl_$O7I6Z^;p0vA z!R{A;cMAWjo3=5UI8c%4gcqTLT1md<0J6ZJ9nKil^IQq%dH-`te9JkM}6O z=1?nX7#zMX_ycXb2J6sxThdKXqt@669XuBZU&IRpKs@4T@%t6vlQ{&eS z2d{c{yjdjVozGZ^SITR<##Ll)9aYAQ4mVpfNbR0kI>$$?=sG3&&N#kyEdqBjd|yP> z*W^qd*9WkDM@S2y-!0x>ChM-{Rs1=U&nOlX3X=nOoXx0c@`IaCa8Z6OnyFIuJhRjc z-rbdIbtC>Eu8nmO=4L+#FsOdx3i^TfSQbx?`X z9D6^Ht5=T#DmA8vqW2-j(xo6Hqm8#tuI(Ix1MP5!Yhzj168!jTX||nznOCk zvb-#i@3fHh^ozF8yqqsbEwUL#4OrkHaVlF?=~^RaNc>ExEzqyIX>QMqVk&79Zbox$ zU)W-P#Kc9}o`2QApV3QFKpVUdM&d+S-C=vbLOz|`cYM`qXrrb49TBi$j#Qu%H0jxXF`|1dMmKD0Wb$Z5Kz$YXnInc z0d|#hbCAsh=}n7PRV+HE-2TXvGA9H;lz?3U$Wsgvr5a^OGTqn04_~}EO_A5ACc~D4 zCnc>dz5*LX;7Bu)hO{k?wlBWf?%+D<64gmt11_l~eAjvss6%|qoiE5f;aCox&l`}Z zB(Ti*VX{tfWJyKHv1^TBh*l8Jw89ZB$k<5=A9xm&Dq0%%1Jr>ViLUG~J~MKu4=KtN z)0iP$>@^NuL~wiu7Aa(XT0nwlGsw8a+A)$>Qzp5g+y~uJX)- z)Z5&tVbADdwVz3nXF2Pc&||_YyHvTl5I4cUiG)1{;g#F8*vH;@?6<{wG?BTrA2Ia3<6=mNWXHm_LO*-k%Ci^x@_RF|}xB-relhswS;betbbip}Gz z*f9?%xr-&+Ir?gPMB}=4Zm5Hqlp=Kd8ghZH5flJf*t?JGdXalBrc(F9wS(5d9w|~y zV^SAlbR8#Ps-M-;aVTIMre_7(&psy;QO}x2TV11GI0kTCj0^3zVdaQ)JTD?oLcpX`TEy*rf+|ddvBJ?oYZG%UP(T~g8(m8>)cUAo#q_^qGrs7yd#rm1gu?pb6vw9~MsLN4=^eb;N>32%JmTk>|+shIbOTok2=Zhah!{+wL% zVD~ZCKf`d13?5wqsxjI@YZ5Lr3@w2csvw|?8^clgqslM zjqp$id-M(D{aY3>`BVMJG1uQFvZk-ImgRp_CdrwILSnQjne&Shd1 zt)r@PCq}>BdYjrqjbSLAupxJpN7CKol;SyAQuIjB#X)Uyt7cL3(TH1ItGOUTs0*XA z3h}s2s*&KceVGHb$V{Ut4w!Nt?h&y~A6WkRU`{%8-(b zTHGwq;&QfevTb>=x&@A(a_AnSPN?+NVY!KCrIB3RL3w{*g*(G-2 zB-e!fUCsOxXQ^!b0SSjX3k@`H|K9w+oyE+><@e_El9hl<0E~zu_)BVx74n4@i6zhJclLAUQ z-B%MN%v@D+(a|HBgrZX^xQgQm+};Z}Mi&hZ5wR)|<-eZbxpH6~ZkUG!t1p!)jccf@qJt%S7vM?u1XXkVm&n~;b6P!%TP*g8PRA};;?XjI#{y2-t_0C* zo*JrNp=|04n9|!lKYk59YbE^f~dq;kp8|uFPWN&jE^nrsRo%Ri3r8nR2dv9&t z>{q%Vzb#uaBARImkMhvJYF}zCJj9oFFL`J}?CQOJ=d(ltd~m-z{&?LmBDQtozOJD( zd^R^52<MLYW!5h`ePgf6?Oibr1ccxsQ~l`Ap-d?gr{=Q zQ-G&E{~v&9=m_w83;&(~o|-=GlKz;kKm+h!fBpaTOixjsCiy=omQY>q-%