From 447c047731974efc82257770b4e9308a6607425b Mon Sep 17 00:00:00 2001 From: afourney Date: Mon, 20 Oct 2025 15:54:05 -0700 Subject: [PATCH 1/2] Test if mammoth resolves rlinks. (#1451) --- packages/markitdown/pyproject.toml | 2 +- .../markitdown/tests/test_files/rlink.docx | Bin 0 -> 13708 bytes packages/markitdown/tests/test_module_misc.py | 48 ++++++++++++++++-- 3 files changed, 46 insertions(+), 4 deletions(-) create mode 100755 packages/markitdown/tests/test_files/rlink.docx diff --git a/packages/markitdown/pyproject.toml b/packages/markitdown/pyproject.toml index 0d9bb616..3056c1c1 100644 --- a/packages/markitdown/pyproject.toml +++ b/packages/markitdown/pyproject.toml @@ -50,7 +50,7 @@ all = [ "azure-identity" ] pptx = ["python-pptx"] -docx = ["mammoth", "lxml"] +docx = ["mammoth~=1.10.0", "lxml"] xlsx = ["pandas", "openpyxl"] xls = ["pandas", "xlrd"] pdf = ["pdfminer.six"] diff --git a/packages/markitdown/tests/test_files/rlink.docx b/packages/markitdown/tests/test_files/rlink.docx new file mode 100755 index 0000000000000000000000000000000000000000..5afb49d2195393466d82f8732ac4c67a3669c671 GIT binary patch literal 13708 zcmb801ymi$wuW&H?(XgyG$eR%cXyYAySr;}2=49#x8Uv?+&wtqk&!#e+_^LJ&RW%{ zt-jt>yQ_9}_1`k$pkOdS-+#!O3hKYC{I+-ngbQR~tuJY4uWO)dugjpQ01X66>U(YC zS9ERS;0y}{47LXf1oX>7=7oa#k`L-75dqa+h8mu{tO)=E0b%`#BB85iXz||4$eP~S z(&87S4b&L2@C)`&NX6(Ovu;L2A+KH&h9S9h74~}26|_mk_P0^8wBsZyQ{!^?rwJ`) zP9I?HIGlEdL|%=6UT@f`L0NN8zGYsxw^#qn%|-oQUWTlMU#c=%nsTdplzfj7Jda4j zqcfjhE;$lFgtOtHSDFNuO>KE)G)Bg4AsI=_y$i=U5fUln)sFA4{MHbS`T}Fgxh0xm z4mK~t_1*S(x+z0$?jk_yow*BkzdS|m-J9?Pco_r^Q{weRbN7dYRm+(hClmTd>FE#zu(f3h!dY-sfDGOXR&r5Rz-z)I1-2nY- z18H<@TIfl6F?mhw9c zGb=Na8NH;X$bG7`mn41eBMb~W7-{{{tbO667POb_WbJyigXr8y#?$rFMVfnp#F*GS z2WsNWTz9JYgBj9RHJ*6$_F$Bq_}gyd`=4QTfrpry)6yxDjoiMME2;s^Niu=h64i+C2RKLw9^--A^qqyeQSW> zKY1-8R#V!K5jF5s@)05TrnDnl7KGN*K$)J&R(Mm>a;%=~KBgXLb4^``aYEDCNowte z2X~b{6;BZBs8glfAadiqKS;C!6a5^gtLA7o4yuX>DM?;zbPhIc3fcz@tQjgrh>VCy z9b8Bp=h#}R8O}wH0{L*d6>mbv4ZO)P8bdJKHa7E8=#^Nub<)uCDj*R|Td>z1G6;h9 zg3MfmUq|EVyk%dJK}C;lI3sVy&G{f*AH6Mkw1GJ>gEKB`aiO+64zx9Byt@+W?XP&)MmHsrqfIrmfD+*NW4}nN%cDx7APK#HM%xsd)M) zj-cqb&Xe@RS}v(?c<6TvMYFf4!*duz63-=5B&I0uO=fXMGQBq%JDx)t^z{(-kg@qD zHgNuTV+DV4nv*ra;Q!H3&Hd(0E5Lw&b`XE{`7haD`dtT0Lo55Ad@l0eon&v4ru8K` zYX2$4F*l*~qUPZTj>yA0kN5(~VzCv4ulwZ`zP7_=evqV+*ZWQ#47NLEc9UT%I$B=4 z2$1lkbJ~uEKJ^OJU;(26`Yb59;%UJ1_Uz( z;!_N4>WaTlK#q|v0uz|K;F3EKjhZQfZBYPd>=Y8>WHsVXR1uIZUqK#fkaPfEhcU0$ z1D|r!ag`103~x{W2qo;`4~Wf>NNUOxHD!9Q9@=Qj9!oKD8)o$>CS!UtRr<9r`sT22 z6U~Pc`;YeAN|CB9uD2d2Do4dlvFMeNz9ngjn)Z}qdGGXB`RfVjI_hcD=&SEYq88f& z>K_*2AlEVPB2EXhmA)}e+z2~Q5jG>~lj1H}B*a(xAq0-8rL1n;llV!)LP2;3kH>rv z8Vbe<6o)TZvYG#^KLf~)?4HEm=6u{IyK-O7_I64udAg8N4gOv+5 zzg~mE?#k^#=Ig?`DA3l`9>a(%3!qU*RkJ=B$s+Pz1jA3;hgEQ?>7CAW)12WQIm;i< zH}u5ly<7}s4Qcbp6YG{3M_CSF6^bI$l6N>xA&(PFRTAJ_1)z&4hqdT)#yyOO8HX*z zr1kAVab7HVHaaKr8(5WPm75d4HYo8w_q7d`%fF)0!A*Tw+|FoT4jV}hkYuE2iWb(d zO&i;+m@=GtD{qtsImD-VvGWq2xIx#))7~%BnY}2tL>;!!Zo+;To;0Bs7dJ2`neo&h{+zMv zNVn7-z?xfk1`Ls%@3XskBguohz#XgD9gknCfMkCj1viA)zEJ22p zq{ZZcg&9fOR-er9ygQOPR^JORscq|MY#d_Jo@y%Y;255+l6su~W?XhHbAzZlAq=lS z?3F_;d?hTG^^U%XOEvm|HNrX*i^+~1TqX3`l%RGt0)6v?Ppx8SF&Pa^O*VOyf!zED zHr%vvM4Rl|+>|Zy)OTYTIyedyoxJ49vBhRVr_8D;4E~C>H6jMqa`*#4E8!u=zZhFUm;;X~@s+lS){lPu*odqab|FJ=GHX6UB}HWs zsj3r}jM%KKVO>kdz7tG|I$aEdqIOd?gBXLK+msgmVV?!`CYh|_LrZ%0$gFa#+pXWm z^`D~M-H}Ph{Y$i)fp{5w{@!sKSzFmF=;~SgW4K$1Te8YwL<~GtUUaH-U(@;gk&3#g zc}aLNwN<4KVI32t^xm6Ciqg8h4_7Q949-oJ)c-E%?(C7J&C6LCIlT#7*FJC)yffI` z2=Flq%hO|F&4LL;#6c==f~Ok}^0D&a+i{x|Yz`h_W(AQVZFioXBDPS6SUDxFL~k7e zdLQ+{V+pTt=wgGsLw9&_Aqpf>fO)omG~5Z(HFhW!Oy_N!E=6x0>|M5bEPHa4`9s znqC0ebhBcfy($2N@35GePeTmL*IpFgoHI5>es({YI-os}1e;RaE0((K6;%un+bbUw zT+zTMA7avl)eWf-bwFccYiu~oe!{kK0JxNM#OZY_vUJ<|b9i$#{h%9CDCqr&`Bgt< zvA{>P83<<{^u^KcK-z(n(#Q1MnPQqvY7vX|QQNI1wk?Htmj+Kix&)2C`~4FY(h5?uYnkXJ~J4YGrKqPw}%(c`YiH1JRRj z{~Kh_r6H()G_6_vVAP01$;j7f3x_d?RGqTU&+G&FGf$_l**Pj#H0n(2q?7K9+OYaI zu`W-ZNhtbU2Nr^4V58JxDNzIW8g+0ONvBS$GF((hHYhG?n!DO;9 z)48CidEsI9J3C^E=Qfm6X{e%F=b`aHMjEK1oh9>du%!5hX3-bf{GsIWvJSWq85m7; zaG6bYl$(iqGUEz111x!xdKreSWXIiV!E?8k5=uN}uyWL!wva)NC{ep9n(evc1p+#8HSV7*)Kth<%)uv7)k)bA69#UHB@LqT%(8`=>&qd*Art{6$Fl42<64GL3N?{o3dmp~$e57A%Oxjkl0 zE!OKT*68sb@Zg1QXTnBrkbZ(dxtAq{WGlkMHmout?ePZ7?m_Os$Xq92MYwnZKIGHq z_Vf$^a_{glk_LURlXQV{zbZb>G$Ex<%H}sg!#>vb_^QHFF3#w8Yp1FbUE2VAc-0Y# z2Hk0+vJpk3L-GE#uJS7%yU;$pkWfLHEsLuaUKEhlgsT|KbQK#$6ChQ> zzS+vJ=x5lR`PFJM!=(!lA)5V5X2CUbd!7W+8qKN5=LPEyGg**6#XE$mXne2K#np=4 z)Kr>EQdmSFEqvp zDWRLH!fQ%*K)L#FntV{pu6Ug&?8yCVYr`Ipky5J@=X@>$M5-9NhE~^uYBIV7 zl$||qofjS+Z7$7L?Kb=a@L>f%Gq^GEoIE{F@jah!UJ9js0^@NcB@AbnJu$RSAWaSP zZ_ZrDAu)iEcJA@N|3?R525chI+WIx_cDuffF{m?eF z2K8F=l5yhtDHt-Avdekfdc*g6Iq;g6*ct`;Lw^;|o@b4(u+FQu=4%mxw21=< z7djXCrtz!36z=1%F4tL79`WYqBa$jgAbfyn)1e zVQ*t|mANLXaN!)84QHOyhE>z_RMIv~aC!GyGY$4!yF9u-(89&4c!Zy1Kk9#b|9mR! z|Ne+ayK?^P6Xb&S!rtPC<29{-s=+#9-_+eTr+NRx|x5Q@zsXgh_{q0rYX>bT~+BHmJmz#Cs~ zbQdU?4onJeMOa)27tg9v8_RMa4$%$n4b<>um}7q~_AkdkrxhD5r1TIZV$+z3qSutq@b*J1n9dC0v$;eIm{ z_tHS}mp|9iK%mFITiT{Rd-WDMf_^2@mr48@K#wdV5z|&R9!t2VuYja&3kg9} zYJc8dMiH7zw^3f(wfuVbsJl`G)n*%mGl6Q>8x(0V&S_zy8 zXuxf?3f^^us1TTHNYpgXL10g{vW_R6B>42%GrXcsk%g0$BEc?76K2)nj4)Yde$K^i zc-gS}z5!6Xv2{C(^FAHpf;gT#DF;Q8C;jC1%rsq6o&NqVrO2+I_!_fnWKgzb5z<&~ z&ru8ZwAUqzqacB_*rnAGsV?z2$n*wy<)bXP@&E};MXU&_4q1g;ut)Ti0o%KlydvY1 zDIqzW+T#*mH}u>E|9ZFDs@#?!OVIVcD7|XN!hU4uGZD7*ROSgqqeRZXq%Ed9U z+?AmiuvP^O4b#c9RjcNsfU}>9 z+8Q6a?mF1h5%%bHJVkSsmXytRAmLYDZCropcH4btp^#hz5eRXsMjE0L$8?#{;q^3% z@tnWEoV zu=Ch#syB7LftGvXU62lYuFJDwOvP+Kq3?kYn35`c_*5^!@^Qw&8<-#i!@W7yiNh$@ z;-x3EQ~2@+R$a(BkI<}{_yWl7b}+~mzeCSTZjNHVR(KWMhd%MbS)SrDOXpS03n3A2J1b`(B_UtLMtClr9>T(H3Xk!mJzf*3vk zkR_Z#iwKL{%YAdXC;Xwctp$6sF^IR8y!n`Y!6(BVF+%e0$-<#(QX8rAYxaphrdb$s zP{(-f!d914p&PbCDG90~LcOc4ujl<@GPL402cNvJ?Q4J1rP-m;hV0W%(#^y+J9M)! z#(dqf_DyfV?b$YgNvadUIOzYAQn4Ltf=d?EE1WA-UKMNF+u8U+ezsRiaW8qjUJ@0bY z)mXBKBS}bfpyx2Z$WE8BE!R-ir1L(g4a2JOXjx}k($D@Z(di$TJjqo)9GnmI(9?cv zdyM_n=WuO%1%>c5O5J#LO)kcJXuA8F@4%IULP~cyU2U}RDWD{sRv*8AcE2y&B;9E^ zcozC|xJg1t_)cNGL4NJ1q>Yzafo@bzB)?FS)uy5_l3)SpldK z7?%tY2wowc5sVQSwTw@bVirHlue%U^;1xK4yTPez`(IK6w1Q~WOL_-MEtmz0Mm+8V zgS7WH0Lnf~ZS*)|N680<>eo&F7h*piDB9vC2-@Nj7K&aVN$5#p_QLqXo)+mM1 zdN0ZM7uwHeDrAA!mHa`>9^ktakI!E^g1l9f-o$$dUb{0$zgKvox*5ebb5oe0VJUgC zM$&>k-S`W^m*>Utp zayIKhi8NBND8Kpw^m=1gi+Y~)HgCmtdkrfWHY__~*T6l`hBx_NGq9AXxK z$#Rg}rzr05Js(#QcSiK_|H%LQng4;~t!uD(+`{(T zQ5tReyl_S7w8vxJJEN5^j{wWrY<8Y;1?aH~Gz1{e2LR1Vm+wH%JzJ?cj{<^l=|VYf z_PiFWU8|mKz0+ZFuhZyF)b<^5=uFr~^@_2v^6T!y)K7eOLqi!HYwq#KH(>~J1o(&2 zOE)QW(=5GPP~p`WsoNEtsk^c?)dlRW&&cs~+mqXttZfF7PZMXcoOPF$ZIZ`pE@R7) z+t<8yW|uFE^3aQF?bn+N3J-zbsz-r7@aAuNwWzMdXP>Y%4nQr6>mQ>{7VDzhf zVjS&r2^8MNlH(ey47jxDW`09wNyvPP6VT0K1WZjGz*R0Ir}FbP9)GKY=^;Eqn``RnOne89 zk+5}=tZVmC%UR$)5^`6}*G!zRneqpGe6mjnRByOl9}$cXv~ix+WQLK(BJN*~czt_4 zWU4DB&&TX+0zAa=X_L)J^7iVuW_}QfEOZ&+l^GUKgMdm+X{Q=v5#a!77L)v z*Zo{eVpVPV?`{XXdkl%@NCS09BYtKr58yr$=K7fXn%Vnymj3^Id~N8ds?vew|` zHVUZaCH9!E2-S7jkmbByYO+t}EwFLqB^r%W2RCocHD^`Q7&*c%P@TcouDQpgUz=%V zkZJNGj)hgxXGEa&<&>CPAn*jTZW(mSm=~hUdeXU2Ms4GcP3k$t&T5e|n0@S_h+I>t zzW+SDN>y1QQJ+9e&Q?T47{uPNKf5-P-+msXu2Q_1Akx~5V$!_~aL@l%CmYfHY876w z3|BKJH@iqRz>f$p9aG|@>M&#QpceU5wdiKvPec$Xz=W;Nky}w=Q*Z63Y_i+#KD$Q1;h{s=rDVhR;gZF_jwD&lPlyuoFkA9Sh7k%aVV{fN7iM?p*{M@* zqAza85SJd*9(pQh?j0StAexJlr66z|GCaS4Ie2YV)XrNi&|ZRxmfVW1e_3=A(Uz6! zLHz!k*FH1lS&s1Bu`Aa+bSx;o8^CrgiKHQFhZS!98b`Cnz_th>feb^nK{rw5v-o((-@c;1!aB5-~^6dK@I57|q+Ru<*P)z?c zi+QXj8MVZY-G(v$Ot^Ql*Bwg>ZVTeYE@>3oIZP@PRVyeSP9;mYpUi{~jJ|Km01jh- z(TY01gdds>td{kT;fV5j6)+Fp zFFft0n6#)<-YbNh!#%B>AF0;$MDqxK zh3~+^ys(UW1%vnBtCY{jQ&vrMQLoO2MljBd0-YS{r3a^FcEOgLV=~fPT{+Y|PDLy= zgsVYaqAvt4*FLHbm4RmmXbw&|E5W9qEIE~`wQKIL^-Df(C?3)P6Ta$|D&sBPcFni0 zsDhJmC>qDRDlHGU-a9=xMELaaSoHP;{q*S67F(jP+~29**$IJ<@mO~};p}Di@t5?0 zEMsX>cW5!jLG%Zcirt4*Y)ye3urwu`s_>MAqDrqUd8uG&WHutYbLCRyE?qBbAD>6I^@Ul3*z%;cGhNS}9zg-SJyFXzZudO|A&h2{1 zZ>pj^-!HYdecRuCU9^i_*52;?)E%bXcE6#$#-rQ;i8`EeCLhnA&+EOr$n?G!0u8nU zt2M316huZ|FS_@YK~}VE76nhtID!%oD7Y1rA@`g7Y>h&10)|Xce`)0AEm7c4nYqYk z(TF^x3QYwMy=sy)v^6TN9GdL5{Xl6&v-ro(F4eG{nx7CW z}Z_3Yb-zHoDOXeYSXNh9rbsN$KTm$=KrujkfA5TzF^9+QfT(t@+ zB(%sFrn+BZb|9T>a{)ZZ%L!OtzUVR*N$t^;#cqs;2M?|!l5F@ zgVyH5JDItCH)xwq%*!8Ap8MPK=`t%?k`JwRnp5vG9axqi`b^#&QPt&^Nqg=XQF;jynp`DCA6e{iWDZ3#Aw z=CW2W8_9|}L>QBN<7_^?O-~!rXK|)JR(Q;dppE_79CUOm#*rRSeh9-x^!edl!`%Y; zgDL%0Rkj>3uCWL2mbt)l#$c>`U%xjQTInanV?H5VIk+5!9@i;2jwL|NQO4buSlD5N zF+g8=+!uFvfmP-jyim6U8Hrg%TWL1gxzDQU9ND61ZVthrxOou5GR`=wOm&nn8Bb90wh5}DBM*3FMtecj>CfM}65XT2W; z)ZT2fC-Wl3@S(39AS<(Qpyb2?JO#y1SdhzRKO`xt${m}^L)bXJ+Qkl~Xa88QpZ2w( z-vNoc9G!BsTusJQQPn154LxHBY7RLyu@#-Z1&{#ZDO;iOg(hf5QQwkEz)r7=Ue^NHlJKZkIfFXi+Dvl3Pg#L{1)z9%}1 zwtWKaDB+)UjgKN+H}{>l?mMrF%F`2cjK}dfS9?yKXHBMUd1|swA=5MTmp}_%pHYmi z@An~LxL>w~OvKI|KZE={i%!0JFKYDi8kz3pB_QhW;nT@bPyS!N15BTiweBKB1X{l# zdqD7a%Hv`a*Jo2j|IkV+UjGE-UqS{l2UOmy&8JpYa^8CqGI`y$jMwkp^g6}e?-IsR zSftw@xe_F=IyKzWqZ}eC{9lw-XK!ZpuO?VD7FbwPG~NkA2jO2bhWDm1n-yZd5lf-SdG|Yg!wDni?0qzr3{lRp1z7_p{FQKjS8C zfT6{25=WmGiRs_UOus{P^s&RyUK~I7qC&;}-SNLde$-FVf5-l!H2qHfp8ljXEsNuo zevvx{I+1=8*h=T1lKEI{h4J>F14LKY3h=)4Ui=z(pnVq=EH^up%ivSZsY@n~RqJuM z)Eh#U9hx(jmne>R^S56Fz>?N`zbh(l%I#4Px@Mj7%CRN2Wb0Hcv!t4YHdrFIR-V zDR+OZb1n$>?EeHyI0=ZUWM=jDjYy$F!s4T=PLe16RugPYxm820Q=v=CI4yrZ#JL$` z^o7BLvOV=KzWX>0zOF{)b1S~fV|_lU)lR%j;E6727Asmb;$GYs!st3s$qo}NLQU(_K_gL zjw0zCE@SHYY~Ix5-RiDlk{X{t^+8Azfk8CO=k1$r%#pZ}6=0%4Yn$R%XA~n1Lh>%t zf=z9*58D6xF$LnqeLu9cefI;!9SX!yVpd*gsVsoBGI{$w`I@uWc1-EO8=8y2&AsU^c3 zD#7mZ#Hs9f-!wf-4dtP^?d~n5(4=wGqXk*I*Qncio-RyoAMxd_LTNU0K2y=p0uo}| zL4(bN(DgerG|FpnY4aan;W;au&Lb%px`2MM4opK}7RoWl!uBBR1#IreTUe>*>}AXZ zk$r#<2U4Q|q+Im`Efd5JECrJVM#|B#mF&`rygd!WrTfAt>%cW}ZrLfv9|8T&Hp@1n zO#hXM>nI8wUOTHcF6&-8kFcGGe;+m)2Ot%u*YPzur~+C13?IademFUfXD$9(A;Idl zsn@4|%Q9QK(-56AhTSz8abOU9(7%>@zXHPiO@EIF^!)Do>dzW{@Lx6ff9k@1CH^l3 z{;$Lr{{cPw2>*%rCq4cP{<|~(693l*L;qxO(92a`4E|lM{_m{)S*!k|wSzz6{-a#~ zJ0t(7gP+CJKT^G30>*!g{6kv(ci6u!JNQp!XMO1$e-r=yyK?_5{r%BGyWdei1~Ns3 zzq9ad%ujKU(SVGw{bLo4>R55A@Fi*dNigl)u^f zU&GnIBmY_p|2@q5mHhp4kmq+M)W4B`4+Ss4zZe?**suKekn2ZN%YFv_c;EgOC;s^g z-@hIDd8F~<6) None: assert "# Test" in result.text_content +def test_doc_rlink() -> None: + # Test for: CVE-2025-11849 + markitdown = MarkItDown() + + # Document with rlink + docx_file = os.path.join(TEST_FILES_DIR, "rlink.docx") + + # Directory containing the target rlink file + rlink_tmp_dir = os.path.abspath(os.sep + "tmp") + + # Ensure the tmp directory exists + if not os.path.exists(rlink_tmp_dir): + pytest.skip(f"Skipping rlink test; {rlink_tmp_dir} directory does not exist.") + return + + rlink_file_path = os.path.join(rlink_tmp_dir, "test_rlink.txt") + rlink_content = "de658225-569e-4e3d-9ed2-cfb6abf927fc" + b64_prefix = ( + "ZGU2NTgyMjUtNTY5ZS00ZTNkLTllZDItY2ZiNmFiZjk" # base64 prefix of rlink_content + ) + + if os.path.exists(rlink_file_path): + with open(rlink_file_path, "r", encoding="utf-8") as f: + existing_content = f.read() + if existing_content != rlink_content: + raise ValueError( + f"Existing {rlink_file_path} content does not match expected content." + ) + else: + with open(rlink_file_path, "w", encoding="utf-8") as f: + f.write(rlink_content) + + try: + result = markitdown.convert(docx_file, keep_data_uris=True).text_content + assert ( + b64_prefix not in result + ) # Make sure the target file was NOT embedded in the output + finally: + os.remove(rlink_file_path) + + @pytest.mark.skipif( skip_remote, reason="do not run tests that query external urls", @@ -301,9 +342,9 @@ def test_markitdown_remote() -> None: assert test_string in result.text_content # Youtube - result = markitdown.convert(YOUTUBE_TEST_URL) - for test_string in YOUTUBE_TEST_STRINGS: - assert test_string in result.text_content + # result = markitdown.convert(YOUTUBE_TEST_URL) + # for test_string in YOUTUBE_TEST_STRINGS: + # assert test_string in result.text_content @pytest.mark.skipif( @@ -452,6 +493,7 @@ def test_markitdown_llm() -> None: test_markitdown_remote, test_speech_transcription, test_exceptions, + test_doc_rlink, test_markitdown_exiftool, test_markitdown_llm_parameters, test_markitdown_llm, From 3d4fe3cdcced195c7f6ce6d266dbf508aa147e54 Mon Sep 17 00:00:00 2001 From: afourney Date: Mon, 20 Oct 2025 16:07:39 -0700 Subject: [PATCH 2/2] Upgrade mammoth to 1.11.0 (#1452) --- packages/markitdown/pyproject.toml | 4 ++-- .../src/markitdown/converters/_docx_converter.py | 7 ------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/packages/markitdown/pyproject.toml b/packages/markitdown/pyproject.toml index 3056c1c1..8fe43ad6 100644 --- a/packages/markitdown/pyproject.toml +++ b/packages/markitdown/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ [project.optional-dependencies] all = [ "python-pptx", - "mammoth~=1.10.0", + "mammoth~=1.11.0", "pandas", "openpyxl", "xlrd", @@ -50,7 +50,7 @@ all = [ "azure-identity" ] pptx = ["python-pptx"] -docx = ["mammoth~=1.10.0", "lxml"] +docx = ["mammoth~=1.11.0", "lxml"] xlsx = ["pandas", "openpyxl"] xls = ["pandas", "xlrd"] pdf = ["pdfminer.six"] diff --git a/packages/markitdown/src/markitdown/converters/_docx_converter.py b/packages/markitdown/src/markitdown/converters/_docx_converter.py index 9cb2cbd5..3975107b 100644 --- a/packages/markitdown/src/markitdown/converters/_docx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py @@ -15,13 +15,6 @@ _dependency_exc_info = None try: import mammoth - import mammoth.docx.files - - def mammoth_files_open(self, uri): - warn("DOCX: processing of r:link resources (e.g., linked images) is disabled.") - return io.BytesIO(b"") - - mammoth.docx.files.Files.open = mammoth_files_open except ImportError: # Preserve the error and stack trace for later