From d7b3cec727457e1f75d1e67d385c9c5d962cca64 Mon Sep 17 00:00:00 2001 From: Christian Zosel Date: Mon, 22 Sep 2025 15:58:22 +0200 Subject: [PATCH] fix(docx-template): corruption of libreoffice-originated templates Certain versions of python-docx and python-docxtemplate cause corruption of files that were originally created with LibreOffice. One effect of that corruption is a duplicate entry in the document-internal files; there are two docProps/core.xml files in the resulting document. Co-Authored-By: David Vogt --- document_merge_service/api/apps.py | 55 ++++++++++++++++++ .../api/data/created_with_libreoffice.docx | Bin 0 -> 4989 bytes .../api/tests/test_template.py | 37 +++++++++++- 3 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 document_merge_service/api/data/created_with_libreoffice.docx diff --git a/document_merge_service/api/apps.py b/document_merge_service/api/apps.py index 68a27710..78326e0e 100644 --- a/document_merge_service/api/apps.py +++ b/document_merge_service/api/apps.py @@ -10,3 +10,58 @@ class DefaultConfig(AppConfig): def ready(self): if "sqlite3" in settings.DATABASES["default"]["ENGINE"]: # pragma: no cover TextField.register_lookup(IContains, lookup_name="search") + mitigate_docxtpl_corruption_bug() + + +def mitigate_docxtpl_corruption_bug(): + # This is basically monkey-patching this PR: + # https://github.com/python-openxml/python-docx/pull/1436 + + # Hold my beer! + from docx.opc.constants import RELATIONSHIP_TYPE + + if hasattr(RELATIONSHIP_TYPE, "CORE_PROPERTIES_OFFICEDOCUMENT"): # pragma: no cover + raise Exception( + "The docxtpl mitigation is no longer required, please remove the monkeypatch code" + ) + + RELATIONSHIP_TYPE.CORE_PROPERTIES_OFFICEDOCUMENT = ( + "http://schemas.openxmlformats.org/officedocument/2006/relationships" + "/metadata/core-properties" + ) + + from docx.opc.package import RT, CorePropertiesPart, OpcPackage, cast + + @property + def _core_properties_part(self) -> CorePropertiesPart: + """|CorePropertiesPart| object related to this package. + + Creates a default core properties part if one is not present (not common). + """ + try: + return cast(CorePropertiesPart, self.part_related_by(RT.CORE_PROPERTIES)) + except KeyError: + try: + office_document_part = self.part_related_by( + RT.CORE_PROPERTIES_OFFICEDOCUMENT # type: ignore + ) + rel = self.relate_to( + office_document_part, + RT.CORE_PROPERTIES_OFFICEDOCUMENT, # type: ignore + ) + self.rels[rel].reltype = RT.CORE_PROPERTIES + return cast(CorePropertiesPart, office_document_part) + except KeyError: + core_properties_part = CorePropertiesPart.default(self) + self.relate_to(core_properties_part, RT.CORE_PROPERTIES) + return core_properties_part + + OpcPackage._core_properties_part = _core_properties_part + + from docx.opc.rel import _Relationship + + @_Relationship.reltype.setter + def reltype(self, value: str): + self._reltype = value + + _Relationship.reltype = reltype diff --git a/document_merge_service/api/data/created_with_libreoffice.docx b/document_merge_service/api/data/created_with_libreoffice.docx new file mode 100644 index 0000000000000000000000000000000000000000..39f4956e5193407484a1537d912f1d19b4fd09ae GIT binary patch literal 4989 zcmaJ_XIN8P7NvJ2ND+usK~OM&^e#a#gc862BGMBjbO=qU(m{HaNUwraks?JDm8$e+ zFw&%{7y;=ZlQ{2ZUYJ6>ZX97sdUX7z^V<}?-+cFxO>H zUUjjZGZ5Bbg%zh?v=u-XCAm*EOo*tdEW#QNfth?X$L>fv3f^r$ISGUvs2#s+?2|8 zR?hyIr#GOuk=of;NQ#w`fBn*MjJp}az74bQ(=C+_(dvbQBjT+G#oD#Yp!UVHnx=G5 z7Oa(T3F?83BO1obDeN>F`J!mEz=|GMQR)?;&Cg>7#f?aK!e?3OYr>dy-0YSgypmRx}tAhG@81ORihIv^~d%cHR95Dx) zD~nGV27n$I8(zA7$3ZZkD~fVaA+$(zLaKUge#9Yu-B2F0Guy_iXFBA3F+N2;5Vlsm z9cOdORMB-(%1x47L&kI*S92yv@H8k|%-dyF0TR^aB&1QUfYgq)XexA-uZb+&XiEO9 zbh7k)kQNsVr1gmr+sn1py6bm)l%3mpr2buiKUqBvLhe?(&=3Z~MF#z_`@u-8&%D$C zl?^Eg$1*rlbIX17T@UxYj`_BHfKe-9& z)Mp834PZo#JnY(gQm3@nX<0W`=2&<}zB;zQM@%0=aUBC)Q51H4z2T`YkN`Fuv_lGJ2fc^Pn%z?M|vK zaVc(95xG25QC!NolGsyDw{Cci1-#$^&W63RUg9h~BZK+EDqC#=*_r3u)QD%PXKC0y z0fYlyUil_Ygc|+2;P3(gYMwtreVPEZkE;hl{NE};*cD#RNEc5c;{R3*qdqzU?5+E3 zHh9^}m?-H@fe_O&UcksI`IMmGqvdHJ{7FbuB+o5bB`IB53ybFT?4jLGds1yya*aye zp=IcE)#=z%HN9FZC(N~HqpN|R%_fH|FXQ-*JU5HSS~o?@r1IdE9xb-tP&*HLd+dO{|kb|$Cl!d{+;MY!P1-NHQbgO)=C zWPjMccOn0^PspiF(N~^>ClW;tknQ2Z8=@Uf-88J;e%$Y8Lv?dFway*M*L!q)5QX9=I>&J-PJQd~KmyQ(51`ZX0YZjj`zV&?8b&mva%%TUz%&M;C>c>3 zaPG6@U616<_^BHJY{K*ppKxDaf0K)f?oY@(^@qK{0#%%;;0c9Y_k+jS#BRlMo%8A& zS>N9$cdbT7f<*7u$2gw49n!%0u}Cx9Y}`$A6-pano$nz^bw0HGWNZRGr<8k}E(oYL!pJz(FkxA1j!fEzofW1S}{8+hevrcRuN2yPVuO+5+j+@J22mEOI z%6`lsCc0!&26sND{W{Cpn+MVCV7iH1pw;DKANkdk3$N~wgR_8j_&$ZG_QY6K?Bc@U>5 z#~~R!hva^-(>Z~3@jLLBrwRqW(Ez)d^}pvBePEWbIN$_TF1TDAu@~9X#u4g?%AySC77vSkmyYmb9f*8d`4LD?PJp ze|80)X#_ORs~>&FyrSM7lq&mN=$`WylPQ->t3+>k?gnVM?=$Fu^nA`9 zcbWKX$Rg@SgZ6piiquqsyC+|qeheFUZgbhAC6+cZB`W_54%b?;4&Po+itKC^{2+s8 zyz;{qtCtOO%+G-RkX$InEEdHZ+V&TXVGZc|%}c9pHf!zbzR6^75nujFe@|tEO&rXy z((nLvQ8)B%G^vH(aK5xyo7=}W8hxm}MBt-P@?xtDe%=*XtYw&QV>@KMz57yr*u^BC z#K%Xa80`Ig?t_zxc}oUJ4_eB&~^g64bb;`>T=!0QFy*v%Ew zED>xs<_5;aClZHF@R#H!Oon`_mR7q_y6YF&yy)$fM^g1*!7G*BWLOd3sIY0-tYDv; z#@5MvltSt_%sYXXp;dDL#uVZb19G?I5J7%My9aWOO5*w34zj62s#PMgSJZT?6Jpf; z>q^^y+N2~+#nL!}^2O2qiODDbX7Wu}7f-m2t~xxCNs z);NbwbZ6nRjYkT64Nfrh)amU+$5lHPuD$!*rV-PoeTiNY_B;+{)<57WkVXO0sE*TM z*U_DOtCR;)mjiIX6sT*`#MWm50G_XEI_4+4oE_OOgJ+A%+a+XR1~a|Djs-5D zHF64a;5}jOO%YSbqrxd)C5Xp{B}@U?G{zgTh`4`5v+f|v(HwbFl%eG7{pX!X6LHn) zX(*#Rtm^4@ZGX^a=YxGZhF2P`haN|IyN~K~n3Rtwhzim#78fp0P>^==KjHqLu7gH; zdZJuz{nQX-qKeB)35M|PoQ}Z{5ZXbtGu{maI;BO_)(Ay*zUYw4U0yJ&60Mnh;A?zR z0&M@K&$lr)v^y)X9>-y9^JMg?FKTpR<#5f|IGo>m=A3QeO|egds$WW*`RX1*g@CqY zrCN+{%Ep@~Ny$?gg$<@Cc-8x7VRnW~G@))eWdefn>|(zHG&I8(0(YP=@_PK4`B zWM}&*2N^{|2k+zBbzi*G4ZriDJF4WF5Brf(S@e!sH3AD= z37GwVJJ0qvPd)9C&Pef}S4pDo;?ne7rz996@cV2bUmwji(GK(JON!BZi_~Z|;5aiI z#vqUR%U0L%rQP~-)%hWJ%SW-e1`0oWbMb}y)pg-;?3e(RkbG|Qh0DluLwWx3R%DQgB|$Z9WzETy zP@TGY{tZk3z%R5i_&Xpwu%c(1x0ru`k42G|3Hh0=g=CoVshBe31j&lc9_GW7aiE7v z8wttMv((vlLu7N>?`tkRw2c1<7{KTCHGZEbWpa;IIo#=;KCu&C>bKWu8Ynxni{oy8 zuT5W`vB4b3r8zlE4u|A370Sim+6zGxA1KER++-HpVhk+4=IH;)cUAG_DCP`hLh-vh ze9{vG56^zRW<&KJ%^ORitu1s~ydf`4d5`tnEG5Uiez^@#X))&Ol0g43JPbjNea6$X znydNPat^vIcm1`D`?w3-{(%D!+?%Zg^{>f?m_w6X+N3y0TVgehcZ3*YGP1FsC=St- zvSK=)sx&Q6Dkj`2eH7Z;j`DXsG%`kkEBnupX6_G=`kZAzBP*tYFMML{(DsTYE1Qns#{>#nf_QJ*Wv*_l9x1dRc3fL>?W5w|s=IZ_LL}H$%~}Z2 zAP>>}iC`=Qf|+X&M3CT&t>CxakZ21cz1mXy^j}HdLG818!9K5Inv0(dndo9s2 z<(+4$cmunCNE`MPf-|Hoe{@4gWKqga-L6<#zc#qBsvv#M$Aq-)bV9X=oMP|^R1^;a zGp*iZ;syCCx6`yxkR{mYY;~C}%*IL~isc1V)lceVc=ud{%c=yPt8CxN zvG%H`PCT#Ap1UpjLF~H@hRh+y4W~~BS$q?IrKINWvsc>o-ix>QARXVHN{!Dux+ij5 z9*|nXOWEU|Q_^-Z5A7{ox|*I$ey4&JdNJxAJ6}8^h(ow};j389p&C0buN@4-C^gS^ zpKk54ViBG+W=**x5y>>gb+CK&$+OU%lS%Yj*N^5xfzX;do)4s9!I<_*@RBkI|4cOP zs+WgHY-fo^ya)F5&uD@p=A3ccU*RSBG`13IK`;(3v(tj>P zjeypF4KBZ@A9n#n*ZM2a)PJS_^02=rA8%zuqwp(K=>AInhi&*h|G2dO^V{nQrTqEr z$J^iU>Bl!NQU8C%0>fYFzjuS*A90*n#2x-Cv 1 + } + + assert problematic_names == {}, "Duplicate entry in docx file's internal structure"