__init__.py 896 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979209802098120982209832098420985209862098720988209892099020991209922099320994209952099620997209982099921000210012100221003210042100521006210072100821009210102101121012210132101421015210162101721018210192102021021210222102321024210252102621027210282102921030210312103221033210342103521036210372103821039210402104121042210432104421045210462104721048210492105021051210522105321054210552105621057210582105921060210612106221063210642106521066210672106821069210702107121072210732107421075210762107721078210792108021081210822108321084210852108621087210882108921090210912109221093210942109521096210972109821099211002110121102211032110421105211062110721108211092111021111211122111321114211152111621117211182111921120211212112221123211242112521126211272112821129211302113121132211332113421135211362113721138211392114021141211422114321144211452114621147211482114921150211512115221153211542115521156211572115821159211602116121162211632116421165211662116721168211692117021171211722117321174211752117621177211782117921180211812118221183211842118521186211872118821189211902119121192211932119421195211962119721198211992120021201212022120321204212052120621207212082120921210212112121221213212142121521216212172121821219212202122121222212232122421225212262122721228212292123021231212322123321234212352123621237212382123921240212412124221243212442124521246212472124821249212502125121252212532125421255212562125721258212592126021261212622126321264212652126621267212682126921270212712127221273212742127521276212772127821279212802128121282212832128421285212862128721288212892129021291212922129321294212952129621297212982129921300213012130221303213042130521306213072130821309213102131121312213132131421315213162131721318213192132021321213222132321324213252132621327213282132921330213312133221333213342133521336213372133821339213402134121342213432134421345213462134721348213492135021351213522135321354213552135621357213582135921360213612136221363213642136521366213672136821369213702137121372213732137421375213762137721378213792138021381213822138321384213852138621387213882138921390213912139221393213942139521396213972139821399214002140121402214032140421405214062140721408214092141021411214122141321414214152141621417214182141921420214212142221423214242142521426214272142821429214302143121432214332143421435214362143721438214392144021441214422144321444214452144621447214482144921450214512145221453214542145521456214572145821459214602146121462214632146421465214662146721468214692147021471214722147321474214752147621477214782147921480214812148221483214842148521486214872148821489214902149121492214932149421495214962149721498214992150021501215022150321504215052150621507215082150921510215112151221513215142151521516215172151821519215202152121522215232152421525215262152721528215292153021531215322153321534215352153621537215382153921540215412154221543215442154521546215472154821549215502155121552215532155421555215562155721558215592156021561215622156321564215652156621567215682156921570215712157221573215742157521576215772157821579215802158121582215832158421585215862158721588215892159021591215922159321594215952159621597215982159921600216012160221603216042160521606216072160821609216102161121612216132161421615216162161721618216192162021621216222162321624216252162621627216282162921630216312163221633216342163521636216372163821639216402164121642216432164421645216462164721648216492165021651216522165321654216552165621657216582165921660216612166221663216642166521666216672166821669216702167121672216732167421675216762167721678216792168021681216822168321684216852168621687216882168921690216912169221693216942169521696216972169821699217002170121702217032170421705217062170721708217092171021711217122171321714217152171621717217182171921720217212172221723217242172521726217272172821729217302173121732217332173421735217362173721738217392174021741217422174321744217452174621747217482174921750217512175221753217542175521756217572175821759217602176121762217632176421765217662176721768217692177021771217722177321774217752177621777217782177921780217812178221783217842178521786217872178821789217902179121792217932179421795217962179721798217992180021801218022180321804218052180621807218082180921810218112181221813218142181521816218172181821819218202182121822218232182421825218262182721828218292183021831218322183321834218352183621837218382183921840218412184221843218442184521846218472184821849218502185121852218532185421855218562185721858218592186021861218622186321864218652186621867218682186921870218712187221873218742187521876218772187821879218802188121882218832188421885218862188721888218892189021891218922189321894218952189621897218982189921900219012190221903219042190521906219072190821909219102191121912219132191421915219162191721918219192192021921219222192321924219252192621927219282192921930219312193221933219342193521936219372193821939219402194121942219432194421945219462194721948219492195021951219522195321954219552195621957219582195921960219612196221963219642196521966219672196821969219702197121972219732197421975219762197721978219792198021981219822198321984219852198621987219882198921990219912199221993219942199521996219972199821999220002200122002220032200422005220062200722008220092201022011220122201322014220152201622017220182201922020220212202222023220242202522026220272202822029220302203122032220332203422035220362203722038220392204022041220422204322044220452204622047220482204922050220512205222053220542205522056220572205822059220602206122062220632206422065220662206722068220692207022071220722207322074220752207622077220782207922080220812208222083220842208522086220872208822089220902209122092220932209422095220962209722098220992210022101221022210322104221052210622107221082210922110221112211222113221142211522116221172211822119221202212122122221232212422125221262212722128221292213022131221322213322134221352213622137221382213922140221412214222143221442214522146221472214822149221502215122152221532215422155221562215722158221592216022161221622216322164221652216622167221682216922170221712217222173221742217522176221772217822179221802218122182221832218422185221862218722188221892219022191221922219322194221952219622197221982219922200222012220222203222042220522206222072220822209222102221122212222132221422215222162221722218222192222022221222222222322224222252222622227222282222922230222312223222233222342223522236222372223822239222402224122242222432224422245222462224722248222492225022251222522225322254222552225622257222582225922260222612226222263222642226522266222672226822269222702227122272222732227422275222762227722278222792228022281222822228322284222852228622287222882228922290222912229222293222942229522296222972229822299223002230122302223032230422305223062230722308223092231022311223122231322314223152231622317223182231922320223212232222323223242232522326223272232822329223302233122332223332233422335223362233722338223392234022341223422234322344223452234622347223482234922350223512235222353223542235522356223572235822359223602236122362223632236422365223662236722368223692237022371223722237322374223752237622377223782237922380223812238222383223842238522386223872238822389223902239122392223932239422395223962239722398223992240022401224022240322404224052240622407224082240922410224112241222413224142241522416224172241822419224202242122422224232242422425224262242722428224292243022431224322243322434224352243622437224382243922440224412244222443224442244522446224472244822449224502245122452224532245422455224562245722458224592246022461224622246322464224652246622467224682246922470224712247222473224742247522476224772247822479224802248122482224832248422485224862248722488224892249022491224922249322494224952249622497224982249922500225012250222503225042250522506225072250822509225102251122512225132251422515225162251722518225192252022521225222252322524225252252622527225282252922530225312253222533225342253522536225372253822539225402254122542225432254422545225462254722548225492255022551225522255322554225552255622557225582255922560225612256222563225642256522566225672256822569225702257122572225732257422575225762257722578225792258022581225822258322584225852258622587225882258922590225912259222593225942259522596225972259822599226002260122602226032260422605226062260722608226092261022611226122261322614226152261622617226182261922620226212262222623226242262522626226272262822629226302263122632226332263422635226362263722638226392264022641226422264322644226452264622647226482264922650226512265222653226542265522656226572265822659226602266122662226632266422665226662266722668226692267022671226722267322674226752267622677226782267922680226812268222683226842268522686226872268822689226902269122692226932269422695226962269722698226992270022701227022270322704227052270622707227082270922710227112271222713227142271522716227172271822719227202272122722227232272422725227262272722728227292273022731227322273322734227352273622737227382273922740227412274222743227442274522746227472274822749227502275122752227532275422755227562275722758227592276022761227622276322764227652276622767227682276922770227712277222773227742277522776227772277822779227802278122782227832278422785227862278722788227892279022791227922279322794227952279622797227982279922800228012280222803228042280522806228072280822809228102281122812228132281422815228162281722818228192282022821228222282322824228252282622827228282282922830228312283222833228342283522836228372283822839228402284122842228432284422845228462284722848228492285022851228522285322854228552285622857228582285922860228612286222863228642286522866228672286822869228702287122872228732287422875228762287722878228792288022881228822288322884228852288622887228882288922890228912289222893228942289522896228972289822899229002290122902229032290422905229062290722908229092291022911229122291322914229152291622917229182291922920229212292222923229242292522926229272292822929229302293122932229332293422935229362293722938229392294022941229422294322944229452294622947229482294922950229512295222953229542295522956229572295822959229602296122962229632296422965229662296722968229692297022971229722297322974229752297622977229782297922980229812298222983229842298522986229872298822989229902299122992229932299422995229962299722998229992300023001230022300323004230052300623007230082300923010230112301223013230142301523016230172301823019230202302123022230232302423025230262302723028230292303023031230322303323034230352303623037230382303923040230412304223043230442304523046230472304823049230502305123052230532305423055230562305723058230592306023061230622306323064230652306623067230682306923070230712307223073230742307523076230772307823079230802308123082230832308423085230862308723088230892309023091230922309323094230952309623097230982309923100231012310223103231042310523106231072310823109231102311123112231132311423115231162311723118231192312023121231222312323124231252312623127231282312923130231312313223133231342313523136231372313823139231402314123142231432314423145231462314723148231492315023151231522315323154231552315623157231582315923160231612316223163231642316523166231672316823169231702317123172231732317423175231762317723178231792318023181231822318323184231852318623187231882318923190231912319223193231942319523196231972319823199232002320123202232032320423205232062320723208232092321023211232122321323214232152321623217232182321923220232212322223223232242322523226232272322823229232302323123232232332323423235232362323723238232392324023241232422324323244232452324623247232482324923250232512325223253232542325523256232572325823259232602326123262232632326423265232662326723268232692327023271232722327323274232752327623277232782327923280232812328223283232842328523286232872328823289232902329123292232932329423295232962329723298232992330023301233022330323304233052330623307233082330923310233112331223313233142331523316233172331823319233202332123322233232332423325233262332723328233292333023331233322333323334233352333623337233382333923340233412334223343233442334523346233472334823349233502335123352233532335423355233562335723358233592336023361233622336323364233652336623367233682336923370233712337223373233742337523376233772337823379233802338123382233832338423385233862338723388233892339023391233922339323394233952339623397233982339923400234012340223403234042340523406234072340823409234102341123412234132341423415234162341723418234192342023421234222342323424234252342623427234282342923430234312343223433234342343523436234372343823439234402344123442234432344423445234462344723448234492345023451234522345323454234552345623457234582345923460234612346223463234642346523466234672346823469234702347123472234732347423475234762347723478234792348023481234822348323484234852348623487234882348923490234912349223493234942349523496234972349823499235002350123502235032350423505235062350723508235092351023511235122351323514235152351623517235182351923520235212352223523235242352523526235272352823529235302353123532235332353423535235362353723538235392354023541235422354323544235452354623547235482354923550235512355223553235542355523556235572355823559235602356123562235632356423565235662356723568235692357023571235722357323574235752357623577235782357923580235812358223583235842358523586235872358823589235902359123592235932359423595235962359723598235992360023601236022360323604236052360623607236082360923610236112361223613236142361523616236172361823619236202362123622236232362423625236262362723628236292363023631236322363323634236352363623637236382363923640236412364223643236442364523646236472364823649236502365123652236532365423655236562365723658236592366023661236622366323664236652366623667236682366923670236712367223673236742367523676236772367823679236802368123682236832368423685236862368723688236892369023691236922369323694236952369623697236982369923700237012370223703237042370523706237072370823709237102371123712237132371423715237162371723718237192372023721237222372323724237252372623727237282372923730237312373223733237342373523736237372373823739237402374123742237432374423745237462374723748237492375023751237522375323754237552375623757237582375923760237612376223763237642376523766237672376823769237702377123772237732377423775237762377723778237792378023781237822378323784237852378623787237882378923790237912379223793237942379523796237972379823799238002380123802238032380423805238062380723808238092381023811238122381323814238152381623817238182381923820238212382223823238242382523826238272382823829238302383123832238332383423835238362383723838238392384023841238422384323844238452384623847238482384923850238512385223853238542385523856238572385823859238602386123862238632386423865238662386723868238692387023871238722387323874238752387623877238782387923880238812388223883238842388523886238872388823889238902389123892238932389423895238962389723898238992390023901239022390323904239052390623907239082390923910239112391223913239142391523916239172391823919239202392123922239232392423925239262392723928239292393023931239322393323934239352393623937239382393923940239412394223943239442394523946239472394823949239502395123952239532395423955239562395723958239592396023961239622396323964239652396623967239682396923970239712397223973239742397523976239772397823979239802398123982239832398423985239862398723988239892399023991239922399323994239952399623997239982399924000240012400224003240042400524006240072400824009240102401124012240132401424015240162401724018240192402024021240222402324024240252402624027240282402924030240312403224033240342403524036240372403824039240402404124042240432404424045240462404724048240492405024051240522405324054240552405624057240582405924060240612406224063240642406524066240672406824069240702407124072240732407424075240762407724078240792408024081240822408324084240852408624087240882408924090240912409224093240942409524096240972409824099241002410124102241032410424105241062410724108241092411024111241122411324114241152411624117241182411924120241212412224123241242412524126241272412824129241302413124132241332413424135241362413724138241392414024141241422414324144241452414624147241482414924150241512415224153241542415524156241572415824159241602416124162241632416424165241662416724168241692417024171241722417324174241752417624177241782417924180241812418224183241842418524186241872418824189241902419124192241932419424195241962419724198241992420024201242022420324204242052420624207242082420924210242112421224213242142421524216242172421824219242202422124222242232422424225242262422724228242292423024231242322423324234242352423624237242382423924240242412424224243242442424524246242472424824249242502425124252242532425424255242562425724258242592426024261242622426324264242652426624267242682426924270242712427224273242742427524276242772427824279242802428124282242832428424285242862428724288242892429024291242922429324294242952429624297242982429924300243012430224303243042430524306243072430824309243102431124312243132431424315243162431724318243192432024321243222432324324243252432624327243282432924330243312433224333243342433524336243372433824339243402434124342243432434424345243462434724348243492435024351243522435324354243552435624357243582435924360243612436224363243642436524366243672436824369243702437124372243732437424375243762437724378243792438024381243822438324384243852438624387243882438924390243912439224393243942439524396243972439824399244002440124402244032440424405244062440724408244092441024411244122441324414244152441624417244182441924420244212442224423244242442524426244272442824429244302443124432244332443424435244362443724438244392444024441244422444324444244452444624447244482444924450244512445224453244542445524456244572445824459244602446124462244632446424465244662446724468244692447024471244722447324474244752447624477244782447924480244812448224483244842448524486244872448824489244902449124492244932449424495244962449724498244992450024501245022450324504245052450624507245082450924510245112451224513245142451524516245172451824519245202452124522245232452424525245262452724528245292453024531245322453324534245352453624537245382453924540245412454224543245442454524546245472454824549245502455124552245532455424555245562455724558245592456024561245622456324564245652456624567245682456924570245712457224573245742457524576245772457824579245802458124582245832458424585245862458724588245892459024591245922459324594245952459624597245982459924600246012460224603246042460524606246072460824609246102461124612246132461424615246162461724618246192462024621246222462324624246252462624627246282462924630246312463224633246342463524636246372463824639246402464124642246432464424645246462464724648246492465024651246522465324654246552465624657246582465924660246612466224663246642466524666246672466824669246702467124672246732467424675246762467724678246792468024681246822468324684246852468624687246882468924690246912469224693246942469524696246972469824699247002470124702247032470424705247062470724708247092471024711247122471324714247152471624717247182471924720247212472224723247242472524726247272472824729247302473124732247332473424735247362473724738247392474024741247422474324744247452474624747247482474924750247512475224753247542475524756247572475824759247602476124762247632476424765247662476724768247692477024771247722477324774247752477624777247782477924780247812478224783247842478524786247872478824789247902479124792247932479424795247962479724798247992480024801248022480324804248052480624807248082480924810248112481224813248142481524816248172481824819248202482124822248232482424825248262482724828248292483024831248322483324834248352483624837248382483924840248412484224843248442484524846248472484824849248502485124852248532485424855248562485724858248592486024861248622486324864248652486624867248682486924870248712487224873248742487524876248772487824879248802488124882248832488424885248862488724888248892489024891248922489324894248952489624897248982489924900249012490224903249042490524906249072490824909249102491124912249132491424915249162491724918249192492024921249222492324924249252492624927249282492924930249312493224933249342493524936249372493824939249402494124942249432494424945249462494724948249492495024951249522495324954249552495624957249582495924960249612496224963249642496524966249672496824969249702497124972249732497424975249762497724978249792498024981249822498324984249852498624987249882498924990249912499224993249942499524996249972499824999250002500125002250032500425005250062500725008250092501025011250122501325014250152501625017250182501925020250212502225023250242502525026250272502825029250302503125032250332503425035250362503725038250392504025041250422504325044250452504625047250482504925050250512505225053250542505525056250572505825059250602506125062250632506425065250662506725068250692507025071250722507325074250752507625077250782507925080250812508225083250842508525086250872508825089250902509125092250932509425095250962509725098250992510025101251022510325104251052510625107251082510925110251112511225113251142511525116251172511825119251202512125122251232512425125251262512725128251292513025131251322513325134251352513625137251382513925140251412514225143251442514525146251472514825149251502515125152251532515425155251562515725158251592516025161251622516325164251652516625167251682516925170251712517225173251742517525176251772517825179251802518125182251832518425185251862518725188251892519025191251922519325194251952519625197251982519925200252012520225203252042520525206252072520825209252102521125212252132521425215252162521725218252192522025221252222522325224252252522625227252282522925230252312523225233252342523525236252372523825239252402524125242252432524425245252462524725248252492525025251252522525325254252552525625257252582525925260252612526225263252642526525266252672526825269252702527125272252732527425275252762527725278252792528025281252822528325284252852528625287252882528925290252912529225293252942529525296252972529825299253002530125302253032530425305253062530725308253092531025311253122531325314253152531625317253182531925320253212532225323253242532525326253272532825329253302533125332253332533425335253362533725338253392534025341253422534325344253452534625347253482534925350253512535225353253542535525356253572535825359253602536125362253632536425365253662536725368253692537025371253722537325374253752537625377253782537925380253812538225383253842538525386253872538825389253902539125392253932539425395253962539725398253992540025401254022540325404254052540625407254082540925410254112541225413254142541525416254172541825419254202542125422254232542425425254262542725428254292543025431254322543325434254352543625437254382543925440254412544225443254442544525446254472544825449254502545125452254532545425455254562545725458254592546025461254622546325464254652546625467254682546925470254712547225473254742547525476254772547825479254802548125482254832548425485254862548725488254892549025491254922549325494254952549625497254982549925500255012550225503255042550525506255072550825509255102551125512255132551425515255162551725518255192552025521255222552325524255252552625527255282552925530255312553225533255342553525536255372553825539255402554125542255432554425545255462554725548255492555025551255522555325554255552555625557255582555925560255612556225563255642556525566255672556825569255702557125572255732557425575255762557725578255792558025581255822558325584255852558625587255882558925590255912559225593255942559525596255972559825599256002560125602256032560425605256062560725608256092561025611256122561325614256152561625617256182561925620256212562225623256242562525626256272562825629256302563125632256332563425635256362563725638256392564025641256422564325644256452564625647256482564925650256512565225653256542565525656256572565825659256602566125662256632566425665256662566725668256692567025671256722567325674256752567625677256782567925680256812568225683256842568525686256872568825689256902569125692256932569425695256962569725698256992570025701257022570325704257052570625707257082570925710257112571225713257142571525716257172571825719257202572125722257232572425725257262572725728257292573025731257322573325734257352573625737257382573925740257412574225743257442574525746257472574825749257502575125752257532575425755257562575725758257592576025761257622576325764257652576625767257682576925770257712577225773257742577525776257772577825779257802578125782257832578425785257862578725788257892579025791257922579325794257952579625797257982579925800258012580225803258042580525806258072580825809258102581125812258132581425815258162581725818258192582025821258222582325824258252582625827258282582925830258312583225833258342583525836258372583825839258402584125842258432584425845258462584725848258492585025851258522585325854258552585625857258582585925860258612586225863258642586525866258672586825869258702587125872258732587425875258762587725878258792588025881258822588325884258852588625887258882588925890258912589225893258942589525896258972589825899
  1. '''
  2. PyMuPDF implemented on top of MuPDF Python bindings.
  3. License:
  4. SPDX-License-Identifier: GPL-3.0-only
  5. '''
  6. # To reduce startup times, we don't import everything we require here.
  7. #
  8. import atexit
  9. import binascii
  10. import collections
  11. import inspect
  12. import io
  13. import math
  14. import os
  15. import pathlib
  16. import glob
  17. import re
  18. import string
  19. import sys
  20. import tarfile
  21. import time
  22. import typing
  23. import warnings
  24. import weakref
  25. import zipfile
  26. from . import extra
  27. import importlib.util
  28. # Set up g_out_log and g_out_message from environment variables.
  29. #
  30. # PYMUPDF_MESSAGE controls the destination of user messages (from function
  31. # `pymupdf.message()`).
  32. #
  33. # PYMUPDF_LOG controls the destination of internal development logging (from
  34. # function `pymupdf.log()`).
  35. #
  36. # For syntax, see _make_output()'s `text` arg.
  37. #
  38. def _make_output(
  39. *,
  40. text=None,
  41. fd=None,
  42. stream=None,
  43. path=None,
  44. path_append=None,
  45. pylogging=None,
  46. pylogging_logger=None,
  47. pylogging_level=None,
  48. pylogging_name=None,
  49. default=None,
  50. ):
  51. '''
  52. Returns a stream that writes to a specified destination, which can be a
  53. file descriptor, a file, an existing stream or Python's `logging' system.
  54. Args:
  55. text: text specification of destination.
  56. fd:<int> - write to file descriptor.
  57. path:<str> - write to file.
  58. path+:<str> - append to file.
  59. logging:<items> - write to Python `logging` module.
  60. items: comma-separated <name=value> pairs.
  61. level=<int>
  62. name=<str>.
  63. Other names are ignored.
  64. fd: an int file descriptor.
  65. stream: something with methods .write(text) and .flush().
  66. If specified we simply return <stream>.
  67. path: a file path.
  68. If specified we return a stream that writes to this file.
  69. path_append: a file path.
  70. If specified we return a stream that appends to this file.
  71. pylogging*:
  72. if any of these args is not None, we return a stream that writes to
  73. Python's `logging` module.
  74. pylogging:
  75. Unused other than to activate use of logging module.
  76. pylogging_logger:
  77. A logging.Logger; If None, set from <pylogging_name>.
  78. pylogging_level:
  79. An int log level, if None we use
  80. pylogging_logger.getEffectiveLevel().
  81. pylogging_name:
  82. Only used if <pylogging_logger> is None:
  83. If <pylogging_name> is None, we set it to 'pymupdf'.
  84. Then we do: pylogging_logger = logging.getLogger(pylogging_name)
  85. '''
  86. if text is not None:
  87. # Textual specification, for example from from environment variable.
  88. if text.startswith('fd:'):
  89. fd = int(text[3:])
  90. elif text.startswith('path:'):
  91. path = text[5:]
  92. elif text.startswith('path+'):
  93. path_append = text[5:]
  94. elif text.startswith('logging:'):
  95. pylogging = True
  96. items_d = dict()
  97. items = text[8:].split(',')
  98. #items_d = {n: v for (n, v) in [item.split('=', 1) for item in items]}
  99. for item in items:
  100. if not item:
  101. continue
  102. nv = item.split('=', 1)
  103. assert len(nv) == 2, f'Need `=` in {item=}.'
  104. n, v = nv
  105. items_d[n] = v
  106. pylogging_level = items_d.get('level')
  107. if pylogging_level is not None:
  108. pylogging_level = int(pylogging_level)
  109. pylogging_name = items_d.get('name', 'pymupdf')
  110. else:
  111. assert 0, f'Expected prefix `fd:`, `path:`. `path+:` or `logging:` in {text=}.'
  112. if fd is not None:
  113. ret = io.open(fd, mode='w', closefd=False)
  114. elif stream is not None:
  115. assert hasattr(stream, 'write')
  116. assert hasattr(stream, 'flush')
  117. ret = stream
  118. elif path is not None:
  119. ret = io.open(path, 'w')
  120. elif path_append is not None:
  121. ret = io.open(path_append, 'a')
  122. elif (0
  123. or pylogging is not None
  124. or pylogging_logger is not None
  125. or pylogging_level is not None
  126. or pylogging_name is not None
  127. ):
  128. import logging
  129. if pylogging_logger is None:
  130. if pylogging_name is None:
  131. pylogging_name = 'pymupdf'
  132. pylogging_logger = logging.getLogger(pylogging_name)
  133. assert isinstance(pylogging_logger, logging.Logger)
  134. if pylogging_level is None:
  135. pylogging_level = pylogging_logger.getEffectiveLevel()
  136. class Out:
  137. def write(self, text):
  138. # `logging` module appends newlines, but so does the `print()`
  139. # functions in our caller message() and log() fns, so we need to
  140. # remove them here.
  141. text = text.rstrip('\n')
  142. if text:
  143. pylogging_logger.log(pylogging_level, text)
  144. def flush(self):
  145. pass
  146. ret = Out()
  147. else:
  148. ret = default
  149. return ret
  150. # Set steam used by PyMuPDF messaging.
  151. _g_out_message = _make_output(text=os.environ.get('PYMUPDF_MESSAGE'), default=sys.stdout)
  152. # Set steam used by PyMuPDF development/debugging logging.
  153. _g_out_log = _make_output(text=os.environ.get('PYMUPDF_LOG'), default=sys.stdout)
  154. # Things for testing logging.
  155. _g_log_items = list()
  156. _g_log_items_active = False
  157. def _log_items():
  158. return _g_log_items
  159. def _log_items_active(active):
  160. global _g_log_items_active
  161. _g_log_items_active = active
  162. def _log_items_clear():
  163. del _g_log_items[:]
  164. def set_messages(
  165. *,
  166. text=None,
  167. fd=None,
  168. stream=None,
  169. path=None,
  170. path_append=None,
  171. pylogging=None,
  172. pylogging_logger=None,
  173. pylogging_level=None,
  174. pylogging_name=None,
  175. ):
  176. '''
  177. Sets destination of PyMuPDF messages. See _make_output() for details.
  178. '''
  179. global _g_out_message
  180. _g_out_message = _make_output(
  181. text=text,
  182. fd=fd,
  183. stream=stream,
  184. path=path,
  185. path_append=path_append,
  186. pylogging=pylogging,
  187. pylogging_logger=pylogging_logger,
  188. pylogging_level=pylogging_level,
  189. pylogging_name=pylogging_name,
  190. default=_g_out_message,
  191. )
  192. def set_log(
  193. *,
  194. text=None,
  195. fd=None,
  196. stream=None,
  197. path=None,
  198. path_append=None,
  199. pylogging=None,
  200. pylogging_logger=None,
  201. pylogging_level=None,
  202. pylogging_name=None,
  203. ):
  204. '''
  205. Sets destination of PyMuPDF development/debugging logging. See
  206. _make_output() for details.
  207. '''
  208. global _g_out_log
  209. _g_out_log = _make_output(
  210. text=text,
  211. fd=fd,
  212. stream=stream,
  213. path=path,
  214. path_append=path_append,
  215. pylogging=pylogging,
  216. pylogging_logger=pylogging_logger,
  217. pylogging_level=pylogging_level,
  218. pylogging_name=pylogging_name,
  219. default=_g_out_log,
  220. )
  221. def log( text='', caller=1):
  222. '''
  223. For development/debugging diagnostics.
  224. '''
  225. try:
  226. stack = inspect.stack(context=0)
  227. except StopIteration:
  228. pass
  229. else:
  230. frame_record = stack[caller]
  231. try:
  232. filename = os.path.relpath(frame_record.filename)
  233. except Exception: # Can fail on windows.
  234. filename = frame_record.filename
  235. line = frame_record.lineno
  236. function = frame_record.function
  237. text = f'{filename}:{line}:{function}(): {text}'
  238. if _g_log_items_active:
  239. _g_log_items.append(text)
  240. if _g_out_log:
  241. print(text, file=_g_out_log, flush=1)
  242. def message(text=''):
  243. '''
  244. For user messages.
  245. '''
  246. # It looks like `print()` does nothing if sys.stdout is None (without
  247. # raising an exception), but we don't rely on this.
  248. if _g_out_message:
  249. print(text, file=_g_out_message, flush=1)
  250. def exception_info():
  251. import traceback
  252. log(f'exception_info:')
  253. log(traceback.format_exc())
  254. # PDF names must not contain these characters:
  255. INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0))
  256. def get_env_bool( name, default):
  257. '''
  258. Returns `True`, `False` or `default` depending on whether $<name> is '1',
  259. '0' or unset. Otherwise assert-fails.
  260. '''
  261. v = os.environ.get( name)
  262. if v is None:
  263. ret = default
  264. elif v == '1':
  265. ret = True
  266. elif v == '0':
  267. ret = False
  268. else:
  269. assert 0, f'Unrecognised value for {name}: {v!r}'
  270. if ret != default:
  271. log(f'Using non-default setting from {name}: {v!r}')
  272. return ret
  273. def get_env_int( name, default):
  274. '''
  275. Returns `True`, `False` or `default` depending on whether $<name> is '1',
  276. '0' or unset. Otherwise assert-fails.
  277. '''
  278. v = os.environ.get( name)
  279. if v is None:
  280. ret = default
  281. else:
  282. ret = int(v)
  283. if ret != default:
  284. log(f'Using non-default setting from {name}: {v}')
  285. return ret
  286. # All our `except ...` blocks output diagnostics if `g_exceptions_verbose` is
  287. # true.
  288. g_exceptions_verbose = get_env_int( 'PYMUPDF_EXCEPTIONS_VERBOSE', 1)
  289. # $PYMUPDF_USE_EXTRA overrides whether to use optimised C fns in `extra`.
  290. #
  291. g_use_extra = get_env_bool( 'PYMUPDF_USE_EXTRA', True)
  292. # Global switches
  293. #
  294. class _Globals:
  295. def __init__(self):
  296. self.no_device_caching = 0
  297. self.small_glyph_heights = 0
  298. self.subset_fontnames = 0
  299. self.skip_quad_corrections = 0
  300. _globals = _Globals()
  301. _get_layout: typing.Optional[typing.Callable] = None
  302. # global switch ensuring that the recommendation message is shown at most once
  303. _recommend_layout = True # must be referred to as "global" everywhere
  304. def no_recommend_layout():
  305. """For users who never want to see the layout recommendation."""
  306. global _recommend_layout
  307. _recommend_layout = False
  308. def _warn_layout_once():
  309. """Check if we should recommend installing the layout package."""
  310. msg="""Consider using the pymupdf_layout package for a greatly improved page layout analysis."""
  311. global _recommend_layout
  312. if (
  313. 1
  314. and _recommend_layout # still True?
  315. and _get_layout is None # no layout function stored here
  316. # client did not globally disable the recommendation
  317. and os.getenv("PYMUPDF_SUGGEST_LAYOUT_ANALYZER") != "0"
  318. # layout is not available in this Python
  319. and not importlib.util.find_spec("pymupdf.layout")
  320. ):
  321. print(msg)
  322. _recommend_layout = False # never show the message again
  323. # Optionally use MuPDF via cppyy bindings; experimental and not tested recently
  324. # as of 2023-01-20 11:51:40
  325. #
  326. mupdf_cppyy = os.environ.get( 'MUPDF_CPPYY')
  327. if mupdf_cppyy is not None:
  328. # pylint: disable=all
  329. log( f'{__file__}: $MUPDF_CPPYY={mupdf_cppyy!r} so attempting to import mupdf_cppyy.')
  330. log( f'{__file__}: $PYTHONPATH={os.environ["PYTHONPATH"]}')
  331. if mupdf_cppyy == '':
  332. import mupdf_cppyy
  333. else:
  334. import importlib
  335. mupdf_cppyy = importlib.machinery.SourceFileLoader(
  336. 'mupdf_cppyy',
  337. mupdf_cppyy
  338. ).load_module()
  339. mupdf = mupdf_cppyy.cppyy.gbl.mupdf
  340. else:
  341. # Use MuPDF Python SWIG bindings. We allow import from either our own
  342. # directory for conventional wheel installs, or from separate place in case
  343. # we are using a separately-installed system installation of mupdf.
  344. #
  345. try:
  346. from . import mupdf
  347. except Exception:
  348. import mupdf
  349. if hasattr(mupdf, 'internal_check_ndebug'):
  350. mupdf.internal_check_ndebug()
  351. mupdf.reinit_singlethreaded()
  352. def _int_rc(text):
  353. '''
  354. Converts string to int, ignoring trailing 'rc...'.
  355. '''
  356. rc = text.find('rc')
  357. if rc >= 0:
  358. text = text[:rc]
  359. return int(text)
  360. # Basic version information.
  361. #
  362. # (We use `noqa F401` to avoid flake8 errors such as `F401
  363. # '._build.mupdf_location' imported but unused`.
  364. #
  365. from ._build import mupdf_location # noqa F401
  366. from ._build import pymupdf_git_branch # noqa F401
  367. from ._build import pymupdf_git_diff # noqa F401
  368. from ._build import pymupdf_git_sha # noqa F401
  369. from ._build import pymupdf_version # noqa F401
  370. from ._build import pymupdf_version_tuple # noqa F401
  371. from ._build import swig_version # noqa F401
  372. from ._build import swig_version_tuple # noqa F401
  373. mupdf_version = mupdf.FZ_VERSION
  374. # Removed in PyMuPDF-1.26.1.
  375. pymupdf_date = None
  376. # Versions as tuples; useful when comparing versions.
  377. #
  378. mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')])
  379. assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \
  380. f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}'
  381. # Legacy version information.
  382. #
  383. version = (pymupdf_version, mupdf_version, None)
  384. VersionFitz = mupdf_version
  385. VersionBind = pymupdf_version
  386. VersionDate = None
  387. # String formatting.
  388. def _format_g(value, *, fmt='%g'):
  389. '''
  390. Returns `value` formatted with mupdf.fz_format_double() if available,
  391. otherwise with Python's `%`.
  392. If `value` is a list or tuple, we return a space-separated string of
  393. formatted values.
  394. '''
  395. if isinstance(value, (list, tuple)):
  396. ret = ''
  397. for v in value:
  398. if ret:
  399. ret += ' '
  400. ret += _format_g(v, fmt=fmt)
  401. return ret
  402. else:
  403. return mupdf.fz_format_double(fmt, value)
  404. format_g = _format_g
  405. # ByteString is gone from typing in 3.14.
  406. # collections.abc.Buffer available from 3.12 only
  407. try:
  408. ByteString = typing.ByteString
  409. except AttributeError:
  410. ByteString = bytes | bytearray | memoryview
  411. # Names required by class method typing annotations.
  412. OptBytes = typing.Optional[ByteString]
  413. OptDict = typing.Optional[dict]
  414. OptFloat = typing.Optional[float]
  415. OptInt = typing.Union[int, None]
  416. OptSeq = typing.Optional[typing.Sequence]
  417. OptStr = typing.Optional[str]
  418. Page = 'Page_forward_decl'
  419. Point = 'Point_forward_decl'
  420. matrix_like = 'matrix_like'
  421. point_like = 'point_like'
  422. quad_like = 'quad_like'
  423. rect_like = 'rect_like'
  424. def _as_fz_document(document):
  425. '''
  426. Returns document as a mupdf.FzDocument, upcasting as required. Raises
  427. 'document closed' exception if closed.
  428. '''
  429. if isinstance(document, Document):
  430. if document.is_closed:
  431. raise ValueError('document closed')
  432. document = document.this
  433. if isinstance(document, mupdf.FzDocument):
  434. return document
  435. elif isinstance(document, mupdf.PdfDocument):
  436. return document.super()
  437. elif document is None:
  438. assert 0, f'document is None'
  439. else:
  440. assert 0, f'Unrecognised {type(document)=}'
  441. def _as_pdf_document(document, required=True):
  442. '''
  443. Returns `document` downcast to a mupdf.PdfDocument. If downcast fails (i.e.
  444. `document` is not actually a `PdfDocument`) then we assert-fail if `required`
  445. is true (the default) else return a `mupdf.PdfDocument` with `.m_internal`
  446. false.
  447. '''
  448. if isinstance(document, Document):
  449. if document.is_closed:
  450. raise ValueError('document closed')
  451. document = document.this
  452. if isinstance(document, mupdf.PdfDocument):
  453. return document
  454. elif isinstance(document, mupdf.FzDocument):
  455. ret = mupdf.PdfDocument(document)
  456. if required:
  457. assert ret.m_internal
  458. return ret
  459. elif document is None:
  460. assert 0, f'document is None'
  461. else:
  462. assert 0, f'Unrecognised {type(document)=}'
  463. def _as_fz_page(page):
  464. '''
  465. Returns page as a mupdf.FzPage, upcasting as required.
  466. '''
  467. if isinstance(page, Page):
  468. page = page.this
  469. if isinstance(page, mupdf.PdfPage):
  470. return page.super()
  471. elif isinstance(page, mupdf.FzPage):
  472. return page
  473. elif page is None:
  474. assert 0, f'page is None'
  475. else:
  476. assert 0, f'Unrecognised {type(page)=}'
  477. def _as_pdf_page(page, required=True):
  478. '''
  479. Returns `page` downcast to a mupdf.PdfPage. If downcast fails (i.e. `page`
  480. is not actually a `PdfPage`) then we assert-fail if `required` is true (the
  481. default) else return a `mupdf.PdfPage` with `.m_internal` false.
  482. '''
  483. if isinstance(page, Page):
  484. page = page.this
  485. if isinstance(page, mupdf.PdfPage):
  486. return page
  487. elif isinstance(page, mupdf.FzPage):
  488. ret = mupdf.pdf_page_from_fz_page(page)
  489. if required:
  490. assert ret.m_internal
  491. return ret
  492. elif page is None:
  493. assert 0, f'page is None'
  494. else:
  495. assert 0, f'Unrecognised {type(page)=}'
  496. def _pdf_annot_page(annot):
  497. '''
  498. Wrapper for mupdf.pdf_annot_page() which raises an exception if <annot>
  499. is not bound to a page instead of returning a mupdf.PdfPage with
  500. `.m_internal=None`.
  501. [Some other MuPDF functions such as pdf_update_annot()` already raise a
  502. similar exception if a pdf_annot's .page field is null.]
  503. '''
  504. page = mupdf.pdf_annot_page(annot)
  505. if not page.m_internal:
  506. raise RuntimeError('Annot is not bound to a page')
  507. return page
  508. # Fixme: we don't support JM_MEMORY=1.
  509. JM_MEMORY = 0
  510. # Classes
  511. #
  512. class Annot:
  513. def __init__(self, annot):
  514. assert isinstance( annot, mupdf.PdfAnnot)
  515. self.this = annot
  516. def __repr__(self):
  517. parent = getattr(self, 'parent', '<>')
  518. return "'%s' annotation on %s" % (self.type[1], str(parent))
  519. def __str__(self):
  520. return self.__repr__()
  521. def _erase(self):
  522. if getattr(self, "thisown", False):
  523. self.thisown = False
  524. def _get_redact_values(self):
  525. annot = self.this
  526. if mupdf.pdf_annot_type(annot) != mupdf.PDF_ANNOT_REDACT:
  527. return
  528. values = dict()
  529. try:
  530. obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "RO")
  531. if obj.m_internal:
  532. message_warning("Ignoring redaction key '/RO'.")
  533. xref = mupdf.pdf_to_num(obj)
  534. values[dictkey_xref] = xref
  535. obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "OverlayText")
  536. if obj.m_internal:
  537. text = mupdf.pdf_to_text_string(obj)
  538. values[dictkey_text] = JM_UnicodeFromStr(text)
  539. else:
  540. values[dictkey_text] = ''
  541. obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'))
  542. align = 0
  543. if obj.m_internal:
  544. align = mupdf.pdf_to_int(obj)
  545. values[dictkey_align] = align
  546. except Exception:
  547. if g_exceptions_verbose: exception_info()
  548. return
  549. val = values
  550. if not val:
  551. return val
  552. val["rect"] = self.rect
  553. text_color, fontname, fontsize = TOOLS._parse_da(self)
  554. val["text_color"] = text_color
  555. val["fontname"] = fontname
  556. val["fontsize"] = fontsize
  557. fill = self.colors["fill"]
  558. val["fill"] = fill
  559. return val
  560. def _getAP(self):
  561. if g_use_extra:
  562. assert isinstance( self.this, mupdf.PdfAnnot)
  563. ret = extra.Annot_getAP(self.this)
  564. assert isinstance( ret, bytes)
  565. return ret
  566. else:
  567. r = None
  568. res = None
  569. annot = self.this
  570. assert isinstance( annot, mupdf.PdfAnnot)
  571. annot_obj = mupdf.pdf_annot_obj( annot)
  572. ap = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  573. if mupdf.pdf_is_stream( ap):
  574. res = mupdf.pdf_load_stream( ap)
  575. if res and res.m_internal:
  576. r = JM_BinFromBuffer(res)
  577. return r
  578. def _setAP(self, buffer_, rect=0):
  579. try:
  580. annot = self.this
  581. annot_obj = mupdf.pdf_annot_obj( annot)
  582. page = _pdf_annot_page(annot)
  583. apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  584. if not apobj.m_internal:
  585. raise RuntimeError( MSG_BAD_APN)
  586. if not mupdf.pdf_is_stream( apobj):
  587. raise RuntimeError( MSG_BAD_APN)
  588. res = JM_BufferFromBytes( buffer_)
  589. if not res.m_internal:
  590. raise ValueError( MSG_BAD_BUFFER)
  591. JM_update_stream( page.doc(), apobj, res, 1)
  592. if rect:
  593. bbox = mupdf.pdf_dict_get_rect( annot_obj, PDF_NAME('Rect'))
  594. mupdf.pdf_dict_put_rect( apobj, PDF_NAME('BBox'), bbox)
  595. except Exception:
  596. if g_exceptions_verbose: exception_info()
  597. def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotate=-1):
  598. annot = self.this
  599. assert annot.m_internal
  600. annot_obj = mupdf.pdf_annot_obj( annot)
  601. page = _pdf_annot_page(annot)
  602. pdf = page.doc()
  603. type_ = mupdf.pdf_annot_type( annot)
  604. nfcol, fcol = JM_color_FromSequence(fill_color)
  605. try:
  606. # remove fill color from unsupported annots
  607. # or if so requested
  608. if nfcol == 0 or type_ not in (
  609. mupdf.PDF_ANNOT_SQUARE,
  610. mupdf.PDF_ANNOT_CIRCLE,
  611. mupdf.PDF_ANNOT_LINE,
  612. mupdf.PDF_ANNOT_POLY_LINE,
  613. mupdf.PDF_ANNOT_POLYGON
  614. ):
  615. mupdf.pdf_dict_del( annot_obj, PDF_NAME('IC'))
  616. elif nfcol > 0:
  617. mupdf.pdf_set_annot_interior_color( annot, fcol[:nfcol])
  618. insert_rot = 1 if rotate >= 0 else 0
  619. if type_ not in (
  620. mupdf.PDF_ANNOT_CARET,
  621. mupdf.PDF_ANNOT_CIRCLE,
  622. mupdf.PDF_ANNOT_FREE_TEXT,
  623. mupdf.PDF_ANNOT_FILE_ATTACHMENT,
  624. mupdf.PDF_ANNOT_INK,
  625. mupdf.PDF_ANNOT_LINE,
  626. mupdf.PDF_ANNOT_POLY_LINE,
  627. mupdf.PDF_ANNOT_POLYGON,
  628. mupdf.PDF_ANNOT_SQUARE,
  629. mupdf.PDF_ANNOT_STAMP,
  630. mupdf.PDF_ANNOT_TEXT,
  631. ):
  632. insert_rot = 0
  633. if insert_rot:
  634. mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
  635. # insert fill color
  636. if type_ == mupdf.PDF_ANNOT_FREE_TEXT:
  637. if nfcol > 0:
  638. mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
  639. elif nfcol > 0:
  640. col = mupdf.pdf_new_array(page.doc(), nfcol)
  641. for i in range( nfcol):
  642. mupdf.pdf_array_push_real(col, fcol[i])
  643. mupdf.pdf_dict_put(annot_obj, PDF_NAME('IC'), col)
  644. mupdf.pdf_dirty_annot(annot)
  645. mupdf.pdf_update_annot(annot) # let MuPDF update
  646. pdf.resynth_required = 0
  647. except Exception as e:
  648. if g_exceptions_verbose:
  649. exception_info()
  650. message( f'cannot update annot: {e}')
  651. raise
  652. if (opacity < 0 or opacity >= 1) and not blend_mode: # no opacity, no blend_mode
  653. return True
  654. try: # create or update /ExtGState
  655. ap = mupdf.pdf_dict_getl(
  656. mupdf.pdf_annot_obj(annot),
  657. PDF_NAME('AP'),
  658. PDF_NAME('N')
  659. )
  660. if not ap.m_internal: # should never happen
  661. raise RuntimeError( MSG_BAD_APN)
  662. resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources'))
  663. if not resources.m_internal: # no Resources yet: make one
  664. resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2)
  665. alp0 = mupdf.pdf_new_dict( page.doc(), 3)
  666. if opacity >= 0 and opacity < 1:
  667. mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity)
  668. mupdf.pdf_dict_put_real( alp0, PDF_NAME('ca'), opacity)
  669. mupdf.pdf_dict_put_real( annot_obj, PDF_NAME('CA'), opacity)
  670. if blend_mode:
  671. mupdf.pdf_dict_put_name( alp0, PDF_NAME('BM'), blend_mode)
  672. mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('BM'), blend_mode)
  673. extg = mupdf.pdf_dict_get( resources, PDF_NAME('ExtGState'))
  674. if not extg.m_internal: # no ExtGState yet: make one
  675. extg = mupdf.pdf_dict_put_dict( resources, PDF_NAME('ExtGState'), 2)
  676. mupdf.pdf_dict_put( extg, PDF_NAME('H'), alp0)
  677. except Exception as e:
  678. if g_exceptions_verbose: exception_info()
  679. message( f'cannot set opacity or blend mode\n: {e}')
  680. raise
  681. return True
  682. @property
  683. def apn_bbox(self):
  684. """annotation appearance bbox"""
  685. CheckParent(self)
  686. annot = self.this
  687. annot_obj = mupdf.pdf_annot_obj(annot)
  688. ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  689. if not ap.m_internal:
  690. val = JM_py_from_rect(mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE))
  691. else:
  692. rect = mupdf.pdf_dict_get_rect(ap, PDF_NAME('BBox'))
  693. val = JM_py_from_rect(rect)
  694. val = Rect(val) * self.get_parent().transformation_matrix
  695. val *= self.get_parent().derotation_matrix
  696. return val
  697. @property
  698. def apn_matrix(self):
  699. """annotation appearance matrix"""
  700. try:
  701. CheckParent(self)
  702. annot = self.this
  703. assert isinstance(annot, mupdf.PdfAnnot)
  704. ap = mupdf.pdf_dict_getl(
  705. mupdf.pdf_annot_obj(annot),
  706. mupdf.PDF_ENUM_NAME_AP,
  707. mupdf.PDF_ENUM_NAME_N
  708. )
  709. if not ap.m_internal:
  710. return JM_py_from_matrix(mupdf.FzMatrix())
  711. mat = mupdf.pdf_dict_get_matrix(ap, mupdf.PDF_ENUM_NAME_Matrix)
  712. val = JM_py_from_matrix(mat)
  713. val = Matrix(val)
  714. return val
  715. except Exception:
  716. if g_exceptions_verbose: exception_info()
  717. raise
  718. @property
  719. def blendmode(self):
  720. """annotation BlendMode"""
  721. CheckParent(self)
  722. annot = self.this
  723. annot_obj = mupdf.pdf_annot_obj(annot)
  724. obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('BM'))
  725. blend_mode = None
  726. if obj.m_internal:
  727. blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(obj))
  728. return blend_mode
  729. # loop through the /AP/N/Resources/ExtGState objects
  730. obj = mupdf.pdf_dict_getl(
  731. annot_obj,
  732. PDF_NAME('AP'),
  733. PDF_NAME('N'),
  734. PDF_NAME('Resources'),
  735. PDF_NAME('ExtGState'),
  736. )
  737. if mupdf.pdf_is_dict(obj):
  738. n = mupdf.pdf_dict_len(obj)
  739. for i in range(n):
  740. obj1 = mupdf.pdf_dict_get_val(obj, i)
  741. if mupdf.pdf_is_dict(obj1):
  742. m = mupdf.pdf_dict_len(obj1)
  743. for j in range(m):
  744. obj2 = mupdf.pdf_dict_get_key(obj1, j)
  745. if mupdf.pdf_objcmp(obj2, PDF_NAME('BM')) == 0:
  746. blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(mupdf.pdf_dict_get_val(obj1, j)))
  747. return blend_mode
  748. return blend_mode
  749. @property
  750. def border(self):
  751. """Border information."""
  752. CheckParent(self)
  753. atype = self.type[0]
  754. if atype not in (
  755. mupdf.PDF_ANNOT_CIRCLE,
  756. mupdf.PDF_ANNOT_FREE_TEXT,
  757. mupdf.PDF_ANNOT_INK,
  758. mupdf.PDF_ANNOT_LINE,
  759. mupdf.PDF_ANNOT_POLY_LINE,
  760. mupdf.PDF_ANNOT_POLYGON,
  761. mupdf.PDF_ANNOT_SQUARE,
  762. ):
  763. return dict()
  764. ao = mupdf.pdf_annot_obj(self.this)
  765. ret = JM_annot_border(ao)
  766. return ret
  767. def clean_contents(self, sanitize=1):
  768. """Clean appearance contents stream."""
  769. CheckParent(self)
  770. annot = self.this
  771. pdf = mupdf.pdf_get_bound_document(mupdf.pdf_annot_obj(annot))
  772. filter_ = _make_PdfFilterOptions(recurse=1, instance_forms=0, ascii=0, sanitize=sanitize)
  773. mupdf.pdf_filter_annot_contents(pdf, annot, filter_)
  774. @property
  775. def colors(self):
  776. """Color definitions."""
  777. try:
  778. CheckParent(self)
  779. annot = self.this
  780. assert isinstance(annot, mupdf.PdfAnnot)
  781. return JM_annot_colors(mupdf.pdf_annot_obj(annot))
  782. except Exception:
  783. if g_exceptions_verbose: exception_info()
  784. raise
  785. def delete_responses(self):
  786. """Delete 'Popup' and responding annotations."""
  787. CheckParent(self)
  788. annot = self.this
  789. annot_obj = mupdf.pdf_annot_obj(annot)
  790. page = _pdf_annot_page(annot)
  791. while 1:
  792. irt_annot = JM_find_annot_irt(annot)
  793. if not irt_annot:
  794. break
  795. mupdf.pdf_delete_annot(page, irt_annot)
  796. mupdf.pdf_dict_del(annot_obj, PDF_NAME('Popup'))
  797. annots = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Annots'))
  798. n = mupdf.pdf_array_len(annots)
  799. found = 0
  800. for i in range(n-1, -1, -1):
  801. o = mupdf.pdf_array_get(annots, i)
  802. p = mupdf.pdf_dict_get(o, PDF_NAME('Parent'))
  803. if not o.m_internal:
  804. continue
  805. if not mupdf.pdf_objcmp(p, annot_obj):
  806. mupdf.pdf_array_delete(annots, i)
  807. found = 1
  808. if found:
  809. mupdf.pdf_dict_put(page.obj(), PDF_NAME('Annots'), annots)
  810. @property
  811. def file_info(self):
  812. """Attached file information."""
  813. CheckParent(self)
  814. res = dict()
  815. length = -1
  816. size = -1
  817. desc = None
  818. annot = self.this
  819. annot_obj = mupdf.pdf_annot_obj(annot)
  820. type_ = mupdf.pdf_annot_type(annot)
  821. if type_ != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
  822. raise TypeError( MSG_BAD_ANNOT_TYPE)
  823. stream = mupdf.pdf_dict_getl(
  824. annot_obj,
  825. PDF_NAME('FS'),
  826. PDF_NAME('EF'),
  827. PDF_NAME('F'),
  828. )
  829. if not stream.m_internal:
  830. RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
  831. fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
  832. o = mupdf.pdf_dict_get(fs, PDF_NAME('UF'))
  833. if o.m_internal:
  834. filename = mupdf.pdf_to_text_string(o)
  835. else:
  836. o = mupdf.pdf_dict_get(fs, PDF_NAME('F'))
  837. if o.m_internal:
  838. filename = mupdf.pdf_to_text_string(o)
  839. o = mupdf.pdf_dict_get(fs, PDF_NAME('Desc'))
  840. if o.m_internal:
  841. desc = mupdf.pdf_to_text_string(o)
  842. o = mupdf.pdf_dict_get(stream, PDF_NAME('Length'))
  843. if o.m_internal:
  844. length = mupdf.pdf_to_int(o)
  845. o = mupdf.pdf_dict_getl(stream, PDF_NAME('Params'), PDF_NAME('Size'))
  846. if o.m_internal:
  847. size = mupdf.pdf_to_int(o)
  848. res[ dictkey_filename] = JM_EscapeStrFromStr(filename)
  849. res[ dictkey_descr] = JM_UnicodeFromStr(desc)
  850. res[ dictkey_length] = length
  851. res[ dictkey_size] = size
  852. return res
  853. @property
  854. def flags(self):
  855. """Flags field."""
  856. CheckParent(self)
  857. annot = self.this
  858. return mupdf.pdf_annot_flags(annot)
  859. def get_file(self):
  860. """Retrieve attached file content."""
  861. CheckParent(self)
  862. annot = self.this
  863. annot_obj = mupdf.pdf_annot_obj(annot)
  864. type = mupdf.pdf_annot_type(annot)
  865. if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
  866. raise TypeError( MSG_BAD_ANNOT_TYPE)
  867. stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
  868. if not stream.m_internal:
  869. RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
  870. buf = mupdf.pdf_load_stream(stream)
  871. res = JM_BinFromBuffer(buf)
  872. return res
  873. def get_oc(self):
  874. """Get annotation optional content reference."""
  875. CheckParent(self)
  876. oc = 0
  877. annot = self.this
  878. annot_obj = mupdf.pdf_annot_obj(annot)
  879. obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('OC'))
  880. if obj.m_internal:
  881. oc = mupdf.pdf_to_num(obj)
  882. return oc
  883. # PyMuPDF doesn't seem to have this .parent member, but removing it breaks
  884. # 11 tests...?
  885. #@property
  886. def get_parent(self):
  887. try:
  888. ret = getattr( self, 'parent')
  889. except AttributeError:
  890. page = _pdf_annot_page(self.this)
  891. assert isinstance( page, mupdf.PdfPage)
  892. document = Document( page.doc()) if page.m_internal else None
  893. ret = Page(page, document)
  894. #self.parent = weakref.proxy( ret)
  895. self.parent = ret
  896. #log(f'No attribute .parent: {type(self)=} {id(self)=}: have set {id(self.parent)=}.')
  897. #log( f'Have set self.parent')
  898. return ret
  899. def get_pixmap(self, matrix=None, dpi=None, colorspace=None, alpha=0):
  900. """annotation Pixmap"""
  901. CheckParent(self)
  902. cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK}
  903. if type(colorspace) is str:
  904. colorspace = cspaces.get(colorspace.lower(), None)
  905. if dpi:
  906. matrix = Matrix(dpi / 72, dpi / 72)
  907. ctm = JM_matrix_from_py(matrix)
  908. cs = colorspace
  909. if not cs:
  910. cs = mupdf.fz_device_rgb()
  911. pix = mupdf.pdf_new_pixmap_from_annot(self.this, ctm, cs, mupdf.FzSeparations(0), alpha)
  912. ret = Pixmap(pix)
  913. if dpi:
  914. ret.set_dpi(dpi, dpi)
  915. return ret
  916. def get_sound(self):
  917. """Retrieve sound stream."""
  918. CheckParent(self)
  919. annot = self.this
  920. annot_obj = mupdf.pdf_annot_obj(annot)
  921. type = mupdf.pdf_annot_type(annot)
  922. sound = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Sound'))
  923. if type != mupdf.PDF_ANNOT_SOUND or not sound.m_internal:
  924. raise TypeError( MSG_BAD_ANNOT_TYPE)
  925. if mupdf.pdf_dict_get(sound, PDF_NAME('F')).m_internal:
  926. RAISEPY( "unsupported sound stream", JM_Exc_FileDataError)
  927. res = dict()
  928. obj = mupdf.pdf_dict_get(sound, PDF_NAME('R'))
  929. if obj.m_internal:
  930. res['rate'] = mupdf.pdf_to_real(obj)
  931. obj = mupdf.pdf_dict_get(sound, PDF_NAME('C'))
  932. if obj.m_internal:
  933. res['channels'] = mupdf.pdf_to_int(obj)
  934. obj = mupdf.pdf_dict_get(sound, PDF_NAME('B'))
  935. if obj.m_internal:
  936. res['bps'] = mupdf.pdf_to_int(obj)
  937. obj = mupdf.pdf_dict_get(sound, PDF_NAME('E'))
  938. if obj.m_internal:
  939. res['encoding'] = mupdf.pdf_to_name(obj)
  940. obj = mupdf.pdf_dict_gets(sound, "CO")
  941. if obj.m_internal:
  942. res['compression'] = mupdf.pdf_to_name(obj)
  943. buf = mupdf.pdf_load_stream(sound)
  944. stream = JM_BinFromBuffer(buf)
  945. res['stream'] = stream
  946. return res
  947. def get_text(self, *args, **kwargs):
  948. return utils.get_text(self, *args, **kwargs)
  949. def get_textbox(self, *args, **kwargs):
  950. return utils.get_textbox(self, *args, **kwargs)
  951. def get_textpage(self, clip=None, flags=0):
  952. """Make annotation TextPage."""
  953. CheckParent(self)
  954. options = mupdf.FzStextOptions(flags)
  955. if clip:
  956. assert hasattr(mupdf, 'FZ_STEXT_CLIP_RECT'), f'MuPDF-{mupdf_version} does not support FZ_STEXT_CLIP_RECT.'
  957. clip2 = JM_rect_from_py(clip)
  958. options.clip = clip2.internal()
  959. options.flags |= mupdf.FZ_STEXT_CLIP_RECT
  960. annot = self.this
  961. stextpage = mupdf.FzStextPage(annot, options)
  962. ret = TextPage(stextpage)
  963. p = self.get_parent()
  964. if isinstance(p, weakref.ProxyType):
  965. ret.parent = p
  966. else:
  967. ret.parent = weakref.proxy(p)
  968. return ret
  969. @property
  970. def has_popup(self):
  971. """Check if annotation has a Popup."""
  972. CheckParent(self)
  973. annot = self.this
  974. obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Popup'))
  975. return True if obj.m_internal else False
  976. @property
  977. def info(self):
  978. """Various information details."""
  979. CheckParent(self)
  980. annot = self.this
  981. res = dict()
  982. res[dictkey_content] = JM_UnicodeFromStr(mupdf.pdf_annot_contents(annot))
  983. o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Name'))
  984. res[dictkey_name] = JM_UnicodeFromStr(mupdf.pdf_to_name(o))
  985. # Title (= author)
  986. o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('T'))
  987. res[dictkey_title] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
  988. # CreationDate
  989. o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "CreationDate")
  990. res[dictkey_creationDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
  991. # ModDate
  992. o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('M'))
  993. res[dictkey_modDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
  994. # Subj
  995. o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "Subj")
  996. res[dictkey_subject] = mupdf.pdf_to_text_string(o)
  997. # Identification (PDF key /NM)
  998. o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM")
  999. res[dictkey_id] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
  1000. return res
  1001. @property
  1002. def irt_xref(self):
  1003. '''
  1004. annotation IRT xref
  1005. '''
  1006. annot = self.this
  1007. annot_obj = mupdf.pdf_annot_obj( annot)
  1008. irt = mupdf.pdf_dict_get( annot_obj, PDF_NAME('IRT'))
  1009. if not irt.m_internal:
  1010. return 0
  1011. return mupdf.pdf_to_num( irt)
  1012. @property
  1013. def is_open(self):
  1014. """Get 'open' status of annotation or its Popup."""
  1015. CheckParent(self)
  1016. return mupdf.pdf_annot_is_open(self.this)
  1017. @property
  1018. def language(self):
  1019. """annotation language"""
  1020. this_annot = self.this
  1021. lang = mupdf.pdf_annot_language(this_annot)
  1022. if lang == mupdf.FZ_LANG_UNSET:
  1023. return
  1024. assert hasattr(mupdf, 'fz_string_from_text_language2')
  1025. return mupdf.fz_string_from_text_language2(lang)
  1026. @property
  1027. def line_ends(self):
  1028. """Line end codes."""
  1029. CheckParent(self)
  1030. annot = self.this
  1031. # return nothing for invalid annot types
  1032. if not mupdf.pdf_annot_has_line_ending_styles(annot):
  1033. return
  1034. lstart = mupdf.pdf_annot_line_start_style(annot)
  1035. lend = mupdf.pdf_annot_line_end_style(annot)
  1036. return lstart, lend
  1037. @property
  1038. def next(self):
  1039. """Next annotation."""
  1040. CheckParent(self)
  1041. this_annot = self.this
  1042. assert isinstance(this_annot, mupdf.PdfAnnot)
  1043. assert this_annot.m_internal
  1044. type_ = mupdf.pdf_annot_type(this_annot)
  1045. if type_ != mupdf.PDF_ANNOT_WIDGET:
  1046. annot = mupdf.pdf_next_annot(this_annot)
  1047. else:
  1048. annot = mupdf.pdf_next_widget(this_annot)
  1049. val = Annot(annot) if annot.m_internal else None
  1050. if not val:
  1051. return None
  1052. val.thisown = True
  1053. assert val.get_parent().this.m_internal_value() == self.get_parent().this.m_internal_value()
  1054. val.parent._annot_refs[id(val)] = val
  1055. if val.type[0] == mupdf.PDF_ANNOT_WIDGET:
  1056. widget = Widget()
  1057. TOOLS._fill_widget(val, widget)
  1058. val = widget
  1059. return val
  1060. @property
  1061. def opacity(self):
  1062. """Opacity."""
  1063. CheckParent(self)
  1064. annot = self.this
  1065. opy = -1
  1066. ca = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_CA)
  1067. if mupdf.pdf_is_number(ca):
  1068. opy = mupdf.pdf_to_real(ca)
  1069. return opy
  1070. @property
  1071. def popup_rect(self):
  1072. """annotation 'Popup' rectangle"""
  1073. CheckParent(self)
  1074. rect = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
  1075. annot = self.this
  1076. annot_obj = mupdf.pdf_annot_obj( annot)
  1077. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Popup'))
  1078. if obj.m_internal:
  1079. rect = mupdf.pdf_dict_get_rect(obj, PDF_NAME('Rect'))
  1080. #log( '{rect=}')
  1081. val = JM_py_from_rect(rect)
  1082. #log( '{val=}')
  1083. val = Rect(val) * self.get_parent().transformation_matrix
  1084. val *= self.get_parent().derotation_matrix
  1085. return val
  1086. @property
  1087. def popup_xref(self):
  1088. """annotation 'Popup' xref"""
  1089. CheckParent(self)
  1090. xref = 0
  1091. annot = self.this
  1092. annot_obj = mupdf.pdf_annot_obj(annot)
  1093. obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Popup'))
  1094. if obj.m_internal:
  1095. xref = mupdf.pdf_to_num(obj)
  1096. return xref
  1097. @property
  1098. def rect(self):
  1099. """annotation rectangle"""
  1100. if g_use_extra:
  1101. val = extra.Annot_rect3( self.this)
  1102. else:
  1103. val = mupdf.pdf_bound_annot(self.this)
  1104. val = Rect(val)
  1105. # Caching self.parent_() reduces 1000x from 0.07 to 0.04.
  1106. #
  1107. p = self.get_parent()
  1108. #p = getattr( self, 'parent', None)
  1109. #if p is None:
  1110. # p = self.parent
  1111. # self.parent = p
  1112. #p = self.parent_()
  1113. val *= p.derotation_matrix
  1114. return val
  1115. @property
  1116. def rect_delta(self):
  1117. '''
  1118. annotation delta values to rectangle
  1119. '''
  1120. annot_obj = mupdf.pdf_annot_obj(self.this)
  1121. arr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('RD'))
  1122. if mupdf.pdf_array_len( arr) == 4:
  1123. return (
  1124. mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 0)),
  1125. mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 1)),
  1126. -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 2)),
  1127. -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 3)),
  1128. )
  1129. @property
  1130. def rotation(self):
  1131. """annotation rotation"""
  1132. CheckParent(self)
  1133. annot = self.this
  1134. rotation = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_Rotate)
  1135. if not rotation.m_internal:
  1136. return -1
  1137. return mupdf.pdf_to_int( rotation)
  1138. def set_apn_bbox(self, bbox):
  1139. """
  1140. Set annotation appearance bbox.
  1141. """
  1142. CheckParent(self)
  1143. page = self.get_parent()
  1144. rot = page.rotation_matrix
  1145. mat = page.transformation_matrix
  1146. bbox *= rot * ~mat
  1147. annot = self.this
  1148. annot_obj = mupdf.pdf_annot_obj(annot)
  1149. ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  1150. if not ap.m_internal:
  1151. raise RuntimeError( MSG_BAD_APN)
  1152. rect = JM_rect_from_py(bbox)
  1153. mupdf.pdf_dict_put_rect(ap, PDF_NAME('BBox'), rect)
  1154. def set_apn_matrix(self, matrix):
  1155. """Set annotation appearance matrix."""
  1156. CheckParent(self)
  1157. annot = self.this
  1158. annot_obj = mupdf.pdf_annot_obj(annot)
  1159. ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  1160. if not ap.m_internal:
  1161. raise RuntimeError( MSG_BAD_APN)
  1162. mat = JM_matrix_from_py(matrix)
  1163. mupdf.pdf_dict_put_matrix(ap, PDF_NAME('Matrix'), mat)
  1164. def set_blendmode(self, blend_mode):
  1165. """Set annotation BlendMode."""
  1166. CheckParent(self)
  1167. annot = self.this
  1168. annot_obj = mupdf.pdf_annot_obj(annot)
  1169. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('BM'), blend_mode)
  1170. def set_border(self, border=None, width=-1, style=None, dashes=None, clouds=-1):
  1171. """Set border properties.
  1172. Either a dict, or direct arguments width, style, dashes or clouds."""
  1173. CheckParent(self)
  1174. atype, atname = self.type[:2] # annotation type
  1175. if atype not in (
  1176. mupdf.PDF_ANNOT_CIRCLE,
  1177. mupdf.PDF_ANNOT_FREE_TEXT,
  1178. mupdf.PDF_ANNOT_INK,
  1179. mupdf.PDF_ANNOT_LINE,
  1180. mupdf.PDF_ANNOT_POLY_LINE,
  1181. mupdf.PDF_ANNOT_POLYGON,
  1182. mupdf.PDF_ANNOT_SQUARE,
  1183. ):
  1184. message(f"Cannot set border for '{atname}'.")
  1185. return None
  1186. if atype not in (
  1187. mupdf.PDF_ANNOT_CIRCLE,
  1188. mupdf.PDF_ANNOT_FREE_TEXT,
  1189. mupdf.PDF_ANNOT_POLYGON,
  1190. mupdf.PDF_ANNOT_SQUARE,
  1191. ):
  1192. if clouds > 0:
  1193. message(f"Cannot set cloudy border for '{atname}'.")
  1194. clouds = -1 # do not set border effect
  1195. if type(border) is not dict:
  1196. border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds}
  1197. border.setdefault("width", -1)
  1198. border.setdefault("style", None)
  1199. border.setdefault("dashes", None)
  1200. border.setdefault("clouds", -1)
  1201. if border["width"] is None:
  1202. border["width"] = -1
  1203. if border["clouds"] is None:
  1204. border["clouds"] = -1
  1205. if hasattr(border["dashes"], "__getitem__"): # ensure sequence items are integers
  1206. border["dashes"] = tuple(border["dashes"])
  1207. for item in border["dashes"]:
  1208. if not isinstance(item, int):
  1209. border["dashes"] = None
  1210. break
  1211. annot = self.this
  1212. annot_obj = mupdf.pdf_annot_obj( annot)
  1213. pdf = mupdf.pdf_get_bound_document( annot_obj)
  1214. return JM_annot_set_border( border, pdf, annot_obj)
  1215. def set_colors(self, colors=None, stroke=None, fill=None):
  1216. """Set 'stroke' and 'fill' colors.
  1217. Use either a dict or the direct arguments.
  1218. """
  1219. if self.type[0] == mupdf.PDF_ANNOT_FREE_TEXT:
  1220. raise ValueError("cannot be used for FreeText annotations")
  1221. CheckParent(self)
  1222. doc = self.get_parent().parent
  1223. if type(colors) is not dict:
  1224. colors = {"fill": fill, "stroke": stroke}
  1225. fill = colors.get("fill")
  1226. stroke = colors.get("stroke")
  1227. fill_annots = (mupdf.PDF_ANNOT_CIRCLE, mupdf.PDF_ANNOT_SQUARE, mupdf.PDF_ANNOT_LINE, mupdf.PDF_ANNOT_POLY_LINE, mupdf.PDF_ANNOT_POLYGON,
  1228. mupdf.PDF_ANNOT_REDACT,)
  1229. if stroke in ([], ()):
  1230. doc.xref_set_key(self.xref, "C", "[]")
  1231. elif stroke is not None:
  1232. if hasattr(stroke, "__float__"):
  1233. stroke = [float(stroke)]
  1234. CheckColor(stroke)
  1235. assert len(stroke) in (1, 3, 4)
  1236. s = f"[{_format_g(stroke)}]"
  1237. doc.xref_set_key(self.xref, "C", s)
  1238. if fill and self.type[0] not in fill_annots:
  1239. message("Warning: fill color ignored for annot type '%s'." % self.type[1])
  1240. return
  1241. if fill in ([], ()):
  1242. doc.xref_set_key(self.xref, "IC", "[]")
  1243. elif fill is not None:
  1244. if hasattr(fill, "__float__"):
  1245. fill = [float(fill)]
  1246. CheckColor(fill)
  1247. assert len(fill) in (1, 3, 4)
  1248. s = f"[{_format_g(fill)}]"
  1249. doc.xref_set_key(self.xref, "IC", s)
  1250. def set_flags(self, flags):
  1251. """Set annotation flags."""
  1252. CheckParent(self)
  1253. annot = self.this
  1254. mupdf.pdf_set_annot_flags(annot, flags)
  1255. def set_info(self, info=None, content=None, title=None, creationDate=None, modDate=None, subject=None):
  1256. """Set various properties."""
  1257. CheckParent(self)
  1258. if type(info) is dict: # build the args from the dictionary
  1259. content = info.get("content", None)
  1260. title = info.get("title", None)
  1261. creationDate = info.get("creationDate", None)
  1262. modDate = info.get("modDate", None)
  1263. subject = info.get("subject", None)
  1264. info = None
  1265. annot = self.this
  1266. # use this to indicate a 'markup' annot type
  1267. is_markup = mupdf.pdf_annot_has_author(annot)
  1268. # contents
  1269. if content:
  1270. mupdf.pdf_set_annot_contents(annot, content)
  1271. if is_markup:
  1272. # title (= author)
  1273. if title:
  1274. mupdf.pdf_set_annot_author(annot, title)
  1275. # creation date
  1276. if creationDate:
  1277. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('CreationDate'), creationDate)
  1278. # mod date
  1279. if modDate:
  1280. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('M'), modDate)
  1281. # subject
  1282. if subject:
  1283. mupdf.pdf_dict_puts(mupdf.pdf_annot_obj(annot), "Subj", mupdf.pdf_new_text_string(subject))
  1284. def set_irt_xref(self, xref):
  1285. '''
  1286. Set annotation IRT xref
  1287. '''
  1288. annot = self.this
  1289. annot_obj = mupdf.pdf_annot_obj( annot)
  1290. page = _pdf_annot_page(annot)
  1291. if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()):
  1292. raise ValueError( MSG_BAD_XREF)
  1293. irt = mupdf.pdf_new_indirect( page.doc(), xref, 0)
  1294. subt = mupdf.pdf_dict_get( irt, PDF_NAME('Subtype'))
  1295. irt_subt = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subt))
  1296. if irt_subt < 0:
  1297. raise ValueError( MSG_IS_NO_ANNOT)
  1298. mupdf.pdf_dict_put( annot_obj, PDF_NAME('IRT'), irt)
  1299. def set_language(self, language=None):
  1300. """Set annotation language."""
  1301. CheckParent(self)
  1302. this_annot = self.this
  1303. if not language:
  1304. lang = mupdf.FZ_LANG_UNSET
  1305. else:
  1306. lang = mupdf.fz_text_language_from_string(language)
  1307. mupdf.pdf_set_annot_language(this_annot, lang)
  1308. def set_line_ends(self, start, end):
  1309. """Set line end codes."""
  1310. CheckParent(self)
  1311. annot = self.this
  1312. if mupdf.pdf_annot_has_line_ending_styles(annot):
  1313. mupdf.pdf_set_annot_line_ending_styles(annot, start, end)
  1314. else:
  1315. message_warning("bad annot type for line ends")
  1316. def set_name(self, name):
  1317. """Set /Name (icon) of annotation."""
  1318. CheckParent(self)
  1319. annot = self.this
  1320. annot_obj = mupdf.pdf_annot_obj(annot)
  1321. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('Name'), name)
  1322. def set_oc(self, oc=0):
  1323. """Set / remove annotation OC xref."""
  1324. CheckParent(self)
  1325. annot = self.this
  1326. annot_obj = mupdf.pdf_annot_obj(annot)
  1327. if not oc:
  1328. mupdf.pdf_dict_del(annot_obj, PDF_NAME('OC'))
  1329. else:
  1330. JM_add_oc_object(mupdf.pdf_get_bound_document(annot_obj), annot_obj, oc)
  1331. def set_opacity(self, opacity):
  1332. """Set opacity."""
  1333. CheckParent(self)
  1334. annot = self.this
  1335. if not _INRANGE(opacity, 0.0, 1.0):
  1336. mupdf.pdf_set_annot_opacity(annot, 1)
  1337. return
  1338. mupdf.pdf_set_annot_opacity(annot, opacity)
  1339. if opacity < 1.0:
  1340. page = _pdf_annot_page(annot)
  1341. page.transparency = 1
  1342. def set_open(self, is_open):
  1343. """Set 'open' status of annotation or its Popup."""
  1344. CheckParent(self)
  1345. annot = self.this
  1346. mupdf.pdf_set_annot_is_open(annot, is_open)
  1347. def set_popup(self, rect):
  1348. '''
  1349. Create annotation 'Popup' or update rectangle.
  1350. '''
  1351. CheckParent(self)
  1352. annot = self.this
  1353. pdfpage = _pdf_annot_page(annot)
  1354. rot = JM_rotate_page_matrix(pdfpage)
  1355. r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
  1356. mupdf.pdf_set_annot_popup(annot, r)
  1357. def set_rect(self, rect):
  1358. """Set annotation rectangle."""
  1359. CheckParent(self)
  1360. annot = self.this
  1361. pdfpage = _pdf_annot_page(annot)
  1362. rot = JM_rotate_page_matrix(pdfpage)
  1363. r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
  1364. if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r):
  1365. raise ValueError( MSG_BAD_RECT)
  1366. try:
  1367. mupdf.pdf_set_annot_rect(annot, r)
  1368. except Exception as e:
  1369. message(f'cannot set rect: {e}')
  1370. return False
  1371. def set_rotation(self, rotate=0):
  1372. """Set annotation rotation."""
  1373. CheckParent(self)
  1374. annot = self.this
  1375. type = mupdf.pdf_annot_type(annot)
  1376. if type not in (
  1377. mupdf.PDF_ANNOT_CARET,
  1378. mupdf.PDF_ANNOT_CIRCLE,
  1379. mupdf.PDF_ANNOT_FREE_TEXT,
  1380. mupdf.PDF_ANNOT_FILE_ATTACHMENT,
  1381. mupdf.PDF_ANNOT_INK,
  1382. mupdf.PDF_ANNOT_LINE,
  1383. mupdf.PDF_ANNOT_POLY_LINE,
  1384. mupdf.PDF_ANNOT_POLYGON,
  1385. mupdf.PDF_ANNOT_SQUARE,
  1386. mupdf.PDF_ANNOT_STAMP,
  1387. mupdf.PDF_ANNOT_TEXT,
  1388. ):
  1389. return
  1390. rot = rotate
  1391. while rot < 0:
  1392. rot += 360
  1393. while rot >= 360:
  1394. rot -= 360
  1395. if type == mupdf.PDF_ANNOT_FREE_TEXT and rot % 90 != 0:
  1396. rot = 0
  1397. annot_obj = mupdf.pdf_annot_obj(annot)
  1398. mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rot)
  1399. @property
  1400. def type(self):
  1401. """annotation type"""
  1402. CheckParent(self)
  1403. if not self.this.m_internal:
  1404. return 'null'
  1405. type_ = mupdf.pdf_annot_type(self.this)
  1406. c = mupdf.pdf_string_from_annot_type(type_)
  1407. o = mupdf.pdf_dict_gets( mupdf.pdf_annot_obj(self.this), 'IT')
  1408. if not o.m_internal or mupdf.pdf_is_name(o):
  1409. return (type_, c)
  1410. it = mupdf.pdf_to_name(o)
  1411. return (type_, c, it)
  1412. def update(self,
  1413. blend_mode: OptStr =None,
  1414. opacity: OptFloat =None,
  1415. fontsize: float =0,
  1416. fontname: OptStr =None,
  1417. text_color: OptSeq =None,
  1418. border_color: OptSeq =None,
  1419. fill_color: OptSeq =None,
  1420. cross_out: bool =True,
  1421. rotate: int =-1,
  1422. ):
  1423. """Update annot appearance.
  1424. Notes:
  1425. Depending on the annot type, some parameters make no sense,
  1426. while others are only available in this method to achieve the
  1427. desired result. This is especially true for 'FreeText' annots.
  1428. Args:
  1429. blend_mode: set the blend mode, all annotations.
  1430. opacity: set the opacity, all annotations.
  1431. fontsize: set fontsize, 'FreeText' only.
  1432. fontname: set the font, 'FreeText' only.
  1433. border_color: set border color, 'FreeText' only.
  1434. text_color: set text color, 'FreeText' only.
  1435. fill_color: set fill color, all annotations.
  1436. cross_out: draw diagonal lines, 'Redact' only.
  1437. rotate: set rotation, 'FreeText' and some others.
  1438. """
  1439. annot_obj = mupdf.pdf_annot_obj(self.this)
  1440. if border_color:
  1441. is_rich_text = mupdf.pdf_dict_get(annot_obj, PDF_NAME("RC"))
  1442. if not is_rich_text:
  1443. raise ValueError("cannot set border_color if rich_text is False")
  1444. Annot.update_timing_test()
  1445. CheckParent(self)
  1446. def color_string(cs, code):
  1447. """Return valid PDF color operator for a given color sequence.
  1448. """
  1449. cc = ColorCode(cs, code)
  1450. if not cc:
  1451. return b""
  1452. return (cc + "\n").encode()
  1453. annot_type = self.type[0] # get the annot type
  1454. dt = self.border.get("dashes", None) # get the dashes spec
  1455. bwidth = self.border.get("width", -1) # get border line width
  1456. stroke = self.colors["stroke"] # get the stroke color
  1457. if fill_color is not None:
  1458. fill = fill_color
  1459. else:
  1460. fill = self.colors["fill"]
  1461. rect = None # self.rect # prevent MuPDF fiddling with it
  1462. apnmat = self.apn_matrix # prevent MuPDF fiddling with it
  1463. if rotate != -1: # sanitize rotation value
  1464. while rotate < 0:
  1465. rotate += 360
  1466. while rotate >= 360:
  1467. rotate -= 360
  1468. if annot_type == mupdf.PDF_ANNOT_FREE_TEXT and rotate % 90 != 0:
  1469. rotate = 0
  1470. #------------------------------------------------------------------
  1471. # handle opacity and blend mode
  1472. #------------------------------------------------------------------
  1473. if blend_mode is None:
  1474. blend_mode = self.blendmode
  1475. if not hasattr(opacity, "__float__"):
  1476. opacity = self.opacity
  1477. if 0 <= opacity < 1 or blend_mode:
  1478. opa_code = "/H gs\n" # then we must reference this 'gs'
  1479. else:
  1480. opa_code = ""
  1481. if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
  1482. CheckColor(text_color)
  1483. CheckColor(fill_color)
  1484. tcol, fname, fsize = TOOLS._parse_da(self)
  1485. # read and update default appearance as necessary
  1486. if fsize <= 0:
  1487. fsize = 12
  1488. if text_color:
  1489. tcol = text_color
  1490. if fontname:
  1491. fname = fontname
  1492. if fontsize > 0:
  1493. fsize = fontsize
  1494. JM_make_annot_DA(self, len(tcol), tcol, fname, fsize)
  1495. blend_mode = None # not supported for free text annotations!
  1496. #------------------------------------------------------------------
  1497. # now invoke MuPDF to update the annot appearance
  1498. #------------------------------------------------------------------
  1499. val = self._update_appearance(
  1500. opacity=opacity,
  1501. blend_mode=blend_mode,
  1502. fill_color=fill,
  1503. rotate=rotate,
  1504. )
  1505. if val is False:
  1506. raise RuntimeError("Error updating annotation.")
  1507. if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
  1508. # in absence of previous opacity, we may need to modify the AP
  1509. ap = self._getAP()
  1510. if 0 <= opacity < 1 and not ap.startswith(b"/H gs"):
  1511. self._setAP(b"/H gs\n" + ap)
  1512. return
  1513. bfill = color_string(fill, "f")
  1514. bstroke = color_string(stroke, "c")
  1515. p_ctm = self.get_parent().transformation_matrix
  1516. imat = ~p_ctm # inverse page transf. matrix
  1517. if dt:
  1518. dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n"
  1519. dashes = dashes.encode("utf-8")
  1520. else:
  1521. dashes = None
  1522. if self.line_ends:
  1523. line_end_le, line_end_ri = self.line_ends
  1524. else:
  1525. line_end_le, line_end_ri = 0, 0 # init line end codes
  1526. # read contents as created by MuPDF
  1527. ap = self._getAP()
  1528. ap_tab = ap.splitlines() # split in single lines
  1529. ap_updated = False # assume we did nothing
  1530. if annot_type == mupdf.PDF_ANNOT_REDACT:
  1531. if cross_out: # create crossed-out rect
  1532. ap_updated = True
  1533. ap_tab = ap_tab[:-1]
  1534. _, LL, LR, UR, UL = ap_tab
  1535. ap_tab.append(LR)
  1536. ap_tab.append(LL)
  1537. ap_tab.append(UR)
  1538. ap_tab.append(LL)
  1539. ap_tab.append(UL)
  1540. ap_tab.append(b"S")
  1541. if bwidth > 0 or bstroke != b"":
  1542. ap_updated = True
  1543. ntab = [_format_g(bwidth).encode() + b" w"] if bwidth > 0 else []
  1544. for line in ap_tab:
  1545. if line.endswith(b"w"):
  1546. continue
  1547. if line.endswith(b"RG") and bstroke != b"":
  1548. line = bstroke[:-1]
  1549. ntab.append(line)
  1550. ap_tab = ntab
  1551. ap = b"\n".join(ap_tab)
  1552. if annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
  1553. ap = b"\n".join(ap_tab[:-1]) + b"\n"
  1554. ap_updated = True
  1555. if bfill != b"":
  1556. if annot_type == mupdf.PDF_ANNOT_POLYGON:
  1557. ap = ap + bfill + b"b" # close, fill, and stroke
  1558. elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
  1559. ap = ap + b"S" # stroke
  1560. else:
  1561. if annot_type == mupdf.PDF_ANNOT_POLYGON:
  1562. ap = ap + b"s" # close and stroke
  1563. elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
  1564. ap = ap + b"S" # stroke
  1565. if dashes is not None: # handle dashes
  1566. ap = dashes + ap
  1567. # reset dashing - only applies for LINE annots with line ends given
  1568. ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1)
  1569. ap_updated = True
  1570. if opa_code:
  1571. ap = opa_code.encode("utf-8") + ap
  1572. ap_updated = True
  1573. ap = b"q\n" + ap + b"\nQ\n"
  1574. #----------------------------------------------------------------------
  1575. # the following handles line end symbols for 'Polygon' and 'Polyline'
  1576. #----------------------------------------------------------------------
  1577. if line_end_le + line_end_ri > 0 and annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
  1578. le_funcs = (None, TOOLS._le_square, TOOLS._le_circle,
  1579. TOOLS._le_diamond, TOOLS._le_openarrow,
  1580. TOOLS._le_closedarrow, TOOLS._le_butt,
  1581. TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow,
  1582. TOOLS._le_slash)
  1583. le_funcs_range = range(1, len(le_funcs))
  1584. d = 2 * max(1, self.border["width"])
  1585. rect = self.rect + (-d, -d, d, d)
  1586. ap_updated = True
  1587. points = self.vertices
  1588. if line_end_le in le_funcs_range:
  1589. p1 = Point(points[0]) * imat
  1590. p2 = Point(points[1]) * imat
  1591. left = le_funcs[line_end_le](self, p1, p2, False, fill_color)
  1592. ap += left.encode()
  1593. if line_end_ri in le_funcs_range:
  1594. p1 = Point(points[-2]) * imat
  1595. p2 = Point(points[-1]) * imat
  1596. left = le_funcs[line_end_ri](self, p1, p2, True, fill_color)
  1597. ap += left.encode()
  1598. if ap_updated:
  1599. if rect: # rect modified here?
  1600. self.set_rect(rect)
  1601. self._setAP(ap, rect=1)
  1602. else:
  1603. self._setAP(ap, rect=0)
  1604. #-------------------------------
  1605. # handle annotation rotations
  1606. #-------------------------------
  1607. if annot_type not in ( # only these types are supported
  1608. mupdf.PDF_ANNOT_CARET,
  1609. mupdf.PDF_ANNOT_CIRCLE,
  1610. mupdf.PDF_ANNOT_FILE_ATTACHMENT,
  1611. mupdf.PDF_ANNOT_INK,
  1612. mupdf.PDF_ANNOT_LINE,
  1613. mupdf.PDF_ANNOT_POLY_LINE,
  1614. mupdf.PDF_ANNOT_POLYGON,
  1615. mupdf.PDF_ANNOT_SQUARE,
  1616. mupdf.PDF_ANNOT_STAMP,
  1617. mupdf.PDF_ANNOT_TEXT,
  1618. ):
  1619. return
  1620. rot = self.rotation # get value from annot object
  1621. if rot == -1: # nothing to change
  1622. return
  1623. M = (self.rect.tl + self.rect.br) / 2 # center of annot rect
  1624. if rot == 0: # undo rotations
  1625. if abs(apnmat - Matrix(1, 1)) < 1e-5:
  1626. return # matrix already is a no-op
  1627. quad = self.rect.morph(M, ~apnmat) # derotate rect
  1628. self.setRect(quad.rect)
  1629. self.set_apn_matrix(Matrix(1, 1)) # appearance matrix = no-op
  1630. return
  1631. mat = Matrix(rot)
  1632. quad = self.rect.morph(M, mat)
  1633. self.set_rect(quad.rect)
  1634. self.set_apn_matrix(apnmat * mat)
  1635. def update_file(self, buffer_=None, filename=None, ufilename=None, desc=None):
  1636. """Update attached file."""
  1637. CheckParent(self)
  1638. annot = self.this
  1639. annot_obj = mupdf.pdf_annot_obj(annot)
  1640. pdf = mupdf.pdf_get_bound_document(annot_obj) # the owning PDF
  1641. type = mupdf.pdf_annot_type(annot)
  1642. if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
  1643. raise TypeError( MSG_BAD_ANNOT_TYPE)
  1644. stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
  1645. # the object for file content
  1646. if not stream.m_internal:
  1647. RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
  1648. fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
  1649. # file content given
  1650. res = JM_BufferFromBytes(buffer_)
  1651. if buffer_ and not res.m_internal:
  1652. raise ValueError( MSG_BAD_BUFFER)
  1653. if res:
  1654. JM_update_stream(pdf, stream, res, 1)
  1655. # adjust /DL and /Size parameters
  1656. len, _ = mupdf.fz_buffer_storage(res)
  1657. l = mupdf.pdf_new_int(len)
  1658. mupdf.pdf_dict_put(stream, PDF_NAME('DL'), l)
  1659. mupdf.pdf_dict_putl(stream, l, PDF_NAME('Params'), PDF_NAME('Size'))
  1660. if filename:
  1661. mupdf.pdf_dict_put_text_string(stream, PDF_NAME('F'), filename)
  1662. mupdf.pdf_dict_put_text_string(fs, PDF_NAME('F'), filename)
  1663. mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), filename)
  1664. mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), filename)
  1665. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('Contents'), filename)
  1666. if ufilename:
  1667. mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), ufilename)
  1668. mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), ufilename)
  1669. if desc:
  1670. mupdf.pdf_dict_put_text_string(stream, PDF_NAME('Desc'), desc)
  1671. mupdf.pdf_dict_put_text_string(fs, PDF_NAME('Desc'), desc)
  1672. @staticmethod
  1673. def update_timing_test():
  1674. total = 0
  1675. for i in range( 30*1000):
  1676. total += i
  1677. return total
  1678. @property
  1679. def vertices(self):
  1680. """annotation vertex points"""
  1681. CheckParent(self)
  1682. annot = self.this
  1683. assert isinstance(annot, mupdf.PdfAnnot)
  1684. annot_obj = mupdf.pdf_annot_obj(annot)
  1685. page = _pdf_annot_page(annot)
  1686. page_ctm = mupdf.FzMatrix() # page transformation matrix
  1687. dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform().
  1688. mupdf.pdf_page_transform(page, dummy, page_ctm)
  1689. derot = JM_derotate_page_matrix(page)
  1690. page_ctm = mupdf.fz_concat(page_ctm, derot)
  1691. #----------------------------------------------------------------
  1692. # The following objects occur in different annotation types.
  1693. # So we are sure that (!o) occurs at most once.
  1694. # Every pair of floats is one point, that needs to be separately
  1695. # transformed with the page transformation matrix.
  1696. #----------------------------------------------------------------
  1697. o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Vertices'))
  1698. if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L'))
  1699. if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints'))
  1700. if not o.m_internal: o = mupdf.pdf_dict_gets(annot_obj, 'CL')
  1701. if o.m_internal:
  1702. # handle lists with 1-level depth
  1703. # weiter
  1704. res = []
  1705. for i in range(0, mupdf.pdf_array_len(o), 2):
  1706. x = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i))
  1707. y = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i+1))
  1708. point = mupdf.FzPoint(x, y)
  1709. point = mupdf.fz_transform_point(point, page_ctm)
  1710. res.append( (point.x, point.y))
  1711. return res
  1712. o = mupdf.pdf_dict_gets(annot_obj, 'InkList')
  1713. if o.m_internal:
  1714. # InkList has 2-level lists
  1715. #inklist:
  1716. res = []
  1717. for i in range(mupdf.pdf_array_len(o)):
  1718. res1 = []
  1719. o1 = mupdf.pdf_array_get(o, i)
  1720. for j in range(0, mupdf.pdf_array_len(o1), 2):
  1721. x = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j))
  1722. y = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j+1))
  1723. point = mupdf.FzPoint(x, y)
  1724. point = mupdf.fz_transform_point(point, page_ctm)
  1725. res1.append( (point.x, point.y))
  1726. res.append(res1)
  1727. return res
  1728. @property
  1729. def xref(self):
  1730. """annotation xref number"""
  1731. CheckParent(self)
  1732. annot = self.this
  1733. return mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot))
  1734. class Archive:
  1735. def __init__( self, *args):
  1736. '''
  1737. Archive(dirname [, path]) - from folder
  1738. Archive(file [, path]) - from file name or object
  1739. Archive(data, name) - from memory item
  1740. Archive() - empty archive
  1741. Archive(archive [, path]) - from archive
  1742. '''
  1743. self._subarchives = list()
  1744. self.this = mupdf.fz_new_multi_archive()
  1745. if args:
  1746. self.add( *args)
  1747. def __repr__( self):
  1748. return f'Archive, sub-archives: {len(self._subarchives)}'
  1749. def _add_arch( self, subarch, path=None):
  1750. mupdf.fz_mount_multi_archive( self.this, subarch, path)
  1751. def _add_dir( self, folder, path=None):
  1752. sub = mupdf.fz_open_directory( folder)
  1753. mupdf.fz_mount_multi_archive( self.this, sub, path)
  1754. def _add_treeitem( self, memory, name, path=None):
  1755. buff = JM_BufferFromBytes( memory)
  1756. sub = mupdf.fz_new_tree_archive( mupdf.FzTree())
  1757. mupdf.fz_tree_archive_add_buffer( sub, name, buff)
  1758. mupdf.fz_mount_multi_archive( self.this, sub, path)
  1759. def _add_ziptarfile( self, filepath, type_, path=None):
  1760. if type_ == 1:
  1761. sub = mupdf.fz_open_zip_archive( filepath)
  1762. else:
  1763. sub = mupdf.fz_open_tar_archive( filepath)
  1764. mupdf.fz_mount_multi_archive( self.this, sub, path)
  1765. def _add_ziptarmemory( self, memory, type_, path=None):
  1766. buff = JM_BufferFromBytes( memory)
  1767. stream = mupdf.fz_open_buffer( buff)
  1768. if type_==1:
  1769. sub = mupdf.fz_open_zip_archive_with_stream( stream)
  1770. else:
  1771. sub = mupdf.fz_open_tar_archive_with_stream( stream)
  1772. mupdf.fz_mount_multi_archive( self.this, sub, path)
  1773. def add( self, content, path=None):
  1774. '''
  1775. Add a sub-archive.
  1776. Args:
  1777. content:
  1778. The content to be added. May be one of:
  1779. `str` - must be path of directory or file.
  1780. `bytes`, `bytearray`, `io.BytesIO` - raw data.
  1781. `zipfile.Zipfile`.
  1782. `tarfile.TarFile`.
  1783. `pymupdf.Archive`.
  1784. A two-item tuple `(data, name)`.
  1785. List or tuple (but not tuple with length 2) of the above.
  1786. path: (str) a "virtual" path name, under which the elements
  1787. of content can be retrieved. Use it to e.g. cope with
  1788. duplicate element names.
  1789. '''
  1790. def is_binary_data(x):
  1791. return isinstance(x, (bytes, bytearray, io.BytesIO))
  1792. def make_subarch(entries, mount, fmt):
  1793. subarch = dict(fmt=fmt, entries=entries, path=mount)
  1794. if fmt != "tree" or self._subarchives == []:
  1795. self._subarchives.append(subarch)
  1796. else:
  1797. ltree = self._subarchives[-1]
  1798. if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]:
  1799. self._subarchives.append(subarch)
  1800. else:
  1801. ltree["entries"].extend(subarch["entries"])
  1802. self._subarchives[-1] = ltree
  1803. if isinstance(content, pathlib.Path):
  1804. content = str(content)
  1805. if isinstance(content, str):
  1806. if os.path.isdir(content):
  1807. self._add_dir(content, path)
  1808. return make_subarch(os.listdir(content), path, 'dir')
  1809. elif os.path.isfile(content):
  1810. assert isinstance(path, str) and path != '', \
  1811. f'Need name for binary content, but {path=}.'
  1812. with io.open(content, 'rb') as f:
  1813. ff = f.read()
  1814. self._add_treeitem(ff, path)
  1815. return make_subarch([path], None, 'tree')
  1816. else:
  1817. raise ValueError(f'Not a file or directory: {content!r}')
  1818. elif is_binary_data(content):
  1819. assert isinstance(path, str) and path != '' \
  1820. f'Need name for binary content, but {path=}.'
  1821. self._add_treeitem(content, path)
  1822. return make_subarch([path], None, 'tree')
  1823. elif isinstance(content, zipfile.ZipFile):
  1824. filename = getattr(content, "filename", None)
  1825. if filename is None:
  1826. fp = content.fp.getvalue()
  1827. self._add_ziptarmemory(fp, 1, path)
  1828. else:
  1829. self._add_ziptarfile(filename, 1, path)
  1830. return make_subarch(content.namelist(), path, 'zip')
  1831. elif isinstance(content, tarfile.TarFile):
  1832. filename = getattr(content.fileobj, "name", None)
  1833. if filename is None:
  1834. fp = content.fileobj
  1835. if not isinstance(fp, io.BytesIO):
  1836. fp = fp.fileobj
  1837. self._add_ziptarmemory(fp.getvalue(), 0, path)
  1838. else:
  1839. self._add_ziptarfile(filename, 0, path)
  1840. return make_subarch(content.getnames(), path, 'tar')
  1841. elif isinstance(content, Archive):
  1842. self._add_arch(content, path)
  1843. return make_subarch([], path, 'multi')
  1844. if isinstance(content, tuple) and len(content) == 2:
  1845. # covers the tree item plus path
  1846. data, name = content
  1847. assert isinstance(name, str), f'Unexpected {type(name)=}'
  1848. if is_binary_data(data):
  1849. self._add_treeitem(data, name, path=path)
  1850. elif isinstance(data, str):
  1851. if os.path.isfile(data):
  1852. with io.open(data, 'rb') as f:
  1853. ff = f.read()
  1854. self._add_treeitem(ff, name, path=path)
  1855. else:
  1856. assert 0, f'Unexpected {type(data)=}.'
  1857. return make_subarch([name], path, 'tree')
  1858. elif hasattr(content, '__getitem__'):
  1859. # Deal with sequence of disparate items.
  1860. for item in content:
  1861. self.add(item, path)
  1862. return
  1863. else:
  1864. raise TypeError(f'Unrecognised type {type(content)}.')
  1865. assert 0
  1866. @property
  1867. def entry_list( self):
  1868. '''
  1869. List of sub archives.
  1870. '''
  1871. return self._subarchives
  1872. def has_entry( self, name):
  1873. return mupdf.fz_has_archive_entry( self.this, name)
  1874. def read_entry( self, name):
  1875. buff = mupdf.fz_read_archive_entry( self.this, name)
  1876. return JM_BinFromBuffer( buff)
  1877. class Xml:
  1878. def __enter__(self):
  1879. return self
  1880. def __exit__(self, *args):
  1881. pass
  1882. def __init__(self, rhs):
  1883. if isinstance(rhs, mupdf.FzXml):
  1884. self.this = rhs
  1885. elif isinstance(rhs, str):
  1886. buff = mupdf.fz_new_buffer_from_copied_data(rhs)
  1887. self.this = mupdf.fz_parse_xml_from_html5(buff)
  1888. else:
  1889. assert 0, f'Unsupported type for rhs: {type(rhs)}'
  1890. def _get_node_tree( self):
  1891. def show_node(node, items, shift):
  1892. while node is not None:
  1893. if node.is_text:
  1894. items.append((shift, f'"{node.text}"'))
  1895. node = node.next
  1896. continue
  1897. items.append((shift, f"({node.tagname}"))
  1898. for k, v in node.get_attributes().items():
  1899. items.append((shift, f"={k} '{v}'"))
  1900. child = node.first_child
  1901. if child:
  1902. items = show_node(child, items, shift + 1)
  1903. items.append((shift, f"){node.tagname}"))
  1904. node = node.next
  1905. return items
  1906. shift = 0
  1907. items = []
  1908. items = show_node(self, items, shift)
  1909. return items
  1910. def add_bullet_list(self):
  1911. """Add bulleted list ("ul" tag)"""
  1912. child = self.create_element("ul")
  1913. self.append_child(child)
  1914. return child
  1915. def add_class(self, text):
  1916. """Set some class via CSS. Replaces complete class spec."""
  1917. cls = self.get_attribute_value("class")
  1918. if cls is not None and text in cls:
  1919. return self
  1920. self.remove_attribute("class")
  1921. if cls is None:
  1922. cls = text
  1923. else:
  1924. cls += " " + text
  1925. self.set_attribute("class", cls)
  1926. return self
  1927. def add_code(self, text=None):
  1928. """Add a "code" tag"""
  1929. child = self.create_element("code")
  1930. if type(text) is str:
  1931. child.append_child(self.create_text_node(text))
  1932. prev = self.span_bottom()
  1933. if prev is None:
  1934. prev = self
  1935. prev.append_child(child)
  1936. return self
  1937. def add_codeblock(self):
  1938. """Add monospaced lines ("pre" node)"""
  1939. child = self.create_element("pre")
  1940. self.append_child(child)
  1941. return child
  1942. def add_description_list(self):
  1943. """Add description list ("dl" tag)"""
  1944. child = self.create_element("dl")
  1945. self.append_child(child)
  1946. return child
  1947. def add_division(self):
  1948. """Add "div" tag"""
  1949. child = self.create_element("div")
  1950. self.append_child(child)
  1951. return child
  1952. def add_header(self, level=1):
  1953. """Add header tag"""
  1954. if level not in range(1, 7):
  1955. raise ValueError("Header level must be in [1, 6]")
  1956. this_tag = self.tagname
  1957. new_tag = f"h{level}"
  1958. child = self.create_element(new_tag)
  1959. if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"):
  1960. self.append_child(child)
  1961. return child
  1962. self.parent.append_child(child)
  1963. return child
  1964. def add_horizontal_line(self):
  1965. """Add horizontal line ("hr" tag)"""
  1966. child = self.create_element("hr")
  1967. self.append_child(child)
  1968. return child
  1969. def add_image(self, name, width=None, height=None, imgfloat=None, align=None):
  1970. """Add image node (tag "img")."""
  1971. child = self.create_element("img")
  1972. if width is not None:
  1973. child.set_attribute("width", f"{width}")
  1974. if height is not None:
  1975. child.set_attribute("height", f"{height}")
  1976. if imgfloat is not None:
  1977. child.set_attribute("style", f"float: {imgfloat}")
  1978. if align is not None:
  1979. child.set_attribute("align", f"{align}")
  1980. child.set_attribute("src", f"{name}")
  1981. self.append_child(child)
  1982. return child
  1983. def add_link(self, href, text=None):
  1984. """Add a hyperlink ("a" tag)"""
  1985. child = self.create_element("a")
  1986. if not isinstance(text, str):
  1987. text = href
  1988. child.set_attribute("href", href)
  1989. child.append_child(self.create_text_node(text))
  1990. prev = self.span_bottom()
  1991. if prev is None:
  1992. prev = self
  1993. prev.append_child(child)
  1994. return self
  1995. def add_list_item(self):
  1996. """Add item ("li" tag) under a (numbered or bulleted) list."""
  1997. if self.tagname not in ("ol", "ul"):
  1998. raise ValueError("cannot add list item to", self.tagname)
  1999. child = self.create_element("li")
  2000. self.append_child(child)
  2001. return child
  2002. def add_number_list(self, start=1, numtype=None):
  2003. """Add numbered list ("ol" tag)"""
  2004. child = self.create_element("ol")
  2005. if start > 1:
  2006. child.set_attribute("start", str(start))
  2007. if numtype is not None:
  2008. child.set_attribute("type", numtype)
  2009. self.append_child(child)
  2010. return child
  2011. def add_paragraph(self):
  2012. """Add "p" tag"""
  2013. child = self.create_element("p")
  2014. if self.tagname != "p":
  2015. self.append_child(child)
  2016. else:
  2017. self.parent.append_child(child)
  2018. return child
  2019. def add_span(self):
  2020. child = self.create_element("span")
  2021. self.append_child(child)
  2022. return child
  2023. def add_style(self, text):
  2024. """Set some style via CSS style. Replaces complete style spec."""
  2025. style = self.get_attribute_value("style")
  2026. if style is not None and text in style:
  2027. return self
  2028. self.remove_attribute("style")
  2029. if style is None:
  2030. style = text
  2031. else:
  2032. style += ";" + text
  2033. self.set_attribute("style", style)
  2034. return self
  2035. def add_subscript(self, text=None):
  2036. """Add a subscript ("sub" tag)"""
  2037. child = self.create_element("sub")
  2038. if type(text) is str:
  2039. child.append_child(self.create_text_node(text))
  2040. prev = self.span_bottom()
  2041. if prev is None:
  2042. prev = self
  2043. prev.append_child(child)
  2044. return self
  2045. def add_superscript(self, text=None):
  2046. """Add a superscript ("sup" tag)"""
  2047. child = self.create_element("sup")
  2048. if type(text) is str:
  2049. child.append_child(self.create_text_node(text))
  2050. prev = self.span_bottom()
  2051. if prev is None:
  2052. prev = self
  2053. prev.append_child(child)
  2054. return self
  2055. def add_text(self, text):
  2056. """Add text. Line breaks are honored."""
  2057. lines = text.splitlines()
  2058. line_count = len(lines)
  2059. prev = self.span_bottom()
  2060. if prev is None:
  2061. prev = self
  2062. for i, line in enumerate(lines):
  2063. prev.append_child(self.create_text_node(line))
  2064. if i < line_count - 1:
  2065. prev.append_child(self.create_element("br"))
  2066. return self
  2067. def append_child( self, child):
  2068. mupdf.fz_dom_append_child( self.this, child.this)
  2069. def append_styled_span(self, style):
  2070. span = self.create_element("span")
  2071. span.add_style(style)
  2072. prev = self.span_bottom()
  2073. if prev is None:
  2074. prev = self
  2075. prev.append_child(span)
  2076. return prev
  2077. def bodytag( self):
  2078. return Xml( mupdf.fz_dom_body( self.this))
  2079. def clone( self):
  2080. ret = mupdf.fz_dom_clone( self.this)
  2081. return Xml( ret)
  2082. @staticmethod
  2083. def color_text(color):
  2084. if type(color) is str:
  2085. return color
  2086. if type(color) is int:
  2087. return f"rgb({sRGB_to_rgb(color)})"
  2088. if type(color) in (tuple, list):
  2089. return f"rgb{tuple(color)}"
  2090. return color
  2091. def create_element( self, tag):
  2092. return Xml( mupdf.fz_dom_create_element( self.this, tag))
  2093. def create_text_node( self, text):
  2094. return Xml( mupdf.fz_dom_create_text_node( self.this, text))
  2095. def debug(self):
  2096. """Print a list of the node tree below self."""
  2097. items = self._get_node_tree()
  2098. for item in items:
  2099. message(" " * item[0] + item[1].replace("\n", "\\n"))
  2100. def find( self, tag, att, match):
  2101. ret = mupdf.fz_dom_find( self.this, tag, att, match)
  2102. if ret.m_internal:
  2103. return Xml( ret)
  2104. def find_next( self, tag, att, match):
  2105. ret = mupdf.fz_dom_find_next( self.this, tag, att, match)
  2106. if ret.m_internal:
  2107. return Xml( ret)
  2108. @property
  2109. def first_child( self):
  2110. if mupdf.fz_xml_text( self.this):
  2111. # text node, has no child.
  2112. return
  2113. ret = mupdf.fz_dom_first_child( self)
  2114. if ret.m_internal:
  2115. return Xml( ret)
  2116. def get_attribute_value( self, key):
  2117. assert key
  2118. return mupdf.fz_dom_attribute( self.this, key)
  2119. def get_attributes( self):
  2120. if mupdf.fz_xml_text( self.this):
  2121. # text node, has no attributes.
  2122. return
  2123. result = dict()
  2124. i = 0
  2125. while 1:
  2126. val, key = mupdf.fz_dom_get_attribute( self.this, i)
  2127. if not val or not key:
  2128. break
  2129. result[ key] = val
  2130. i += 1
  2131. return result
  2132. def insert_after( self, node):
  2133. mupdf.fz_dom_insert_after( self.this, node.this)
  2134. def insert_before( self, node):
  2135. mupdf.fz_dom_insert_before( self.this, node.this)
  2136. def insert_text(self, text):
  2137. lines = text.splitlines()
  2138. line_count = len(lines)
  2139. for i, line in enumerate(lines):
  2140. self.append_child(self.create_text_node(line))
  2141. if i < line_count - 1:
  2142. self.append_child(self.create_element("br"))
  2143. return self
  2144. @property
  2145. def is_text(self):
  2146. """Check if this is a text node."""
  2147. return self.text is not None
  2148. @property
  2149. def last_child(self):
  2150. """Return last child node."""
  2151. child = self.first_child
  2152. if child is None:
  2153. return None
  2154. while True:
  2155. next = child.next
  2156. if not next:
  2157. return child
  2158. child = next
  2159. @property
  2160. def next( self):
  2161. ret = mupdf.fz_dom_next( self.this)
  2162. if ret.m_internal:
  2163. return Xml( ret)
  2164. @property
  2165. def parent( self):
  2166. ret = mupdf.fz_dom_parent( self.this)
  2167. if ret.m_internal:
  2168. return Xml( ret)
  2169. @property
  2170. def previous( self):
  2171. ret = mupdf.fz_dom_previous( self.this)
  2172. if ret.m_internal:
  2173. return Xml( ret)
  2174. def remove( self):
  2175. mupdf.fz_dom_remove( self.this)
  2176. def remove_attribute( self, key):
  2177. assert key
  2178. mupdf.fz_dom_remove_attribute( self.this, key)
  2179. @property
  2180. def root( self):
  2181. return Xml( mupdf.fz_xml_root( self.this))
  2182. def set_align(self, align):
  2183. """Set text alignment via CSS style"""
  2184. text = "text-align: %s"
  2185. if isinstance( align, str):
  2186. t = align
  2187. elif align == TEXT_ALIGN_LEFT:
  2188. t = "left"
  2189. elif align == TEXT_ALIGN_CENTER:
  2190. t = "center"
  2191. elif align == TEXT_ALIGN_RIGHT:
  2192. t = "right"
  2193. elif align == TEXT_ALIGN_JUSTIFY:
  2194. t = "justify"
  2195. else:
  2196. raise ValueError(f"Unrecognised {align=}")
  2197. text = text % t
  2198. self.add_style(text)
  2199. return self
  2200. def set_attribute( self, key, value):
  2201. assert key
  2202. mupdf.fz_dom_add_attribute( self.this, key, value)
  2203. def set_bgcolor(self, color):
  2204. """Set background color via CSS style"""
  2205. text = f"background-color: %s" % self.color_text(color)
  2206. self.add_style(text) # does not work on span level
  2207. return self
  2208. def set_bold(self, val=True):
  2209. """Set bold on / off via CSS style"""
  2210. if val:
  2211. val="bold"
  2212. else:
  2213. val="normal"
  2214. text = "font-weight: %s" % val
  2215. self.append_styled_span(text)
  2216. return self
  2217. def set_color(self, color):
  2218. """Set text color via CSS style"""
  2219. text = f"color: %s" % self.color_text(color)
  2220. self.append_styled_span(text)
  2221. return self
  2222. def set_columns(self, cols):
  2223. """Set number of text columns via CSS style"""
  2224. text = f"columns: {cols}"
  2225. self.append_styled_span(text)
  2226. return self
  2227. def set_font(self, font):
  2228. """Set font-family name via CSS style"""
  2229. text = "font-family: %s" % font
  2230. self.append_styled_span(text)
  2231. return self
  2232. def set_fontsize(self, fontsize):
  2233. """Set font size name via CSS style"""
  2234. if type(fontsize) is str:
  2235. px=""
  2236. else:
  2237. px="px"
  2238. text = f"font-size: {fontsize}{px}"
  2239. self.append_styled_span(text)
  2240. return self
  2241. def set_id(self, unique):
  2242. """Set a unique id."""
  2243. # check uniqueness
  2244. root = self.root
  2245. if root.find(None, "id", unique):
  2246. raise ValueError(f"id '{unique}' already exists")
  2247. self.set_attribute("id", unique)
  2248. return self
  2249. def set_italic(self, val=True):
  2250. """Set italic on / off via CSS style"""
  2251. if val:
  2252. val="italic"
  2253. else:
  2254. val="normal"
  2255. text = "font-style: %s" % val
  2256. self.append_styled_span(text)
  2257. return self
  2258. def set_leading(self, leading):
  2259. """Set inter-line spacing value via CSS style - block-level only."""
  2260. text = f"-mupdf-leading: {leading}"
  2261. self.add_style(text)
  2262. return self
  2263. def set_letter_spacing(self, spacing):
  2264. """Set inter-letter spacing value via CSS style"""
  2265. text = f"letter-spacing: {spacing}"
  2266. self.append_styled_span(text)
  2267. return self
  2268. def set_lineheight(self, lineheight):
  2269. """Set line height name via CSS style - block-level only."""
  2270. text = f"line-height: {lineheight}"
  2271. self.add_style(text)
  2272. return self
  2273. def set_margins(self, val):
  2274. """Set margin values via CSS style"""
  2275. text = "margins: %s" % val
  2276. self.append_styled_span(text)
  2277. return self
  2278. def set_opacity(self, opacity):
  2279. """Set opacity via CSS style"""
  2280. text = f"opacity: {opacity}"
  2281. self.append_styled_span(text)
  2282. return self
  2283. def set_pagebreak_after(self):
  2284. """Insert a page break after this node."""
  2285. text = "page-break-after: always"
  2286. self.add_style(text)
  2287. return self
  2288. def set_pagebreak_before(self):
  2289. """Insert a page break before this node."""
  2290. text = "page-break-before: always"
  2291. self.add_style(text)
  2292. return self
  2293. def set_properties(
  2294. self,
  2295. align=None,
  2296. bgcolor=None,
  2297. bold=None,
  2298. color=None,
  2299. columns=None,
  2300. font=None,
  2301. fontsize=None,
  2302. indent=None,
  2303. italic=None,
  2304. leading=None,
  2305. letter_spacing=None,
  2306. lineheight=None,
  2307. margins=None,
  2308. pagebreak_after=None,
  2309. pagebreak_before=None,
  2310. word_spacing=None,
  2311. unqid=None,
  2312. cls=None,
  2313. ):
  2314. """Set any or all properties of a node.
  2315. To be used for existing nodes preferably.
  2316. """
  2317. root = self.root
  2318. temp = root.add_division()
  2319. if align is not None:
  2320. temp.set_align(align)
  2321. if bgcolor is not None:
  2322. temp.set_bgcolor(bgcolor)
  2323. if bold is not None:
  2324. temp.set_bold(bold)
  2325. if color is not None:
  2326. temp.set_color(color)
  2327. if columns is not None:
  2328. temp.set_columns(columns)
  2329. if font is not None:
  2330. temp.set_font(font)
  2331. if fontsize is not None:
  2332. temp.set_fontsize(fontsize)
  2333. if indent is not None:
  2334. temp.set_text_indent(indent)
  2335. if italic is not None:
  2336. temp.set_italic(italic)
  2337. if leading is not None:
  2338. temp.set_leading(leading)
  2339. if letter_spacing is not None:
  2340. temp.set_letter_spacing(letter_spacing)
  2341. if lineheight is not None:
  2342. temp.set_lineheight(lineheight)
  2343. if margins is not None:
  2344. temp.set_margins(margins)
  2345. if pagebreak_after is not None:
  2346. temp.set_pagebreak_after()
  2347. if pagebreak_before is not None:
  2348. temp.set_pagebreak_before()
  2349. if word_spacing is not None:
  2350. temp.set_word_spacing(word_spacing)
  2351. if unqid is not None:
  2352. self.set_id(unqid)
  2353. if cls is not None:
  2354. self.add_class(cls)
  2355. styles = []
  2356. top_style = temp.get_attribute_value("style")
  2357. if top_style is not None:
  2358. styles.append(top_style)
  2359. child = temp.first_child
  2360. while child:
  2361. styles.append(child.get_attribute_value("style"))
  2362. child = child.first_child
  2363. self.set_attribute("style", ";".join(styles))
  2364. temp.remove()
  2365. return self
  2366. def set_text_indent(self, indent):
  2367. """Set text indentation name via CSS style - block-level only."""
  2368. text = f"text-indent: {indent}"
  2369. self.add_style(text)
  2370. return self
  2371. def set_underline(self, val="underline"):
  2372. text = "text-decoration: %s" % val
  2373. self.append_styled_span(text)
  2374. return self
  2375. def set_word_spacing(self, spacing):
  2376. """Set inter-word spacing value via CSS style"""
  2377. text = f"word-spacing: {spacing}"
  2378. self.append_styled_span(text)
  2379. return self
  2380. def span_bottom(self):
  2381. """Find deepest level in stacked spans."""
  2382. parent = self
  2383. child = self.last_child
  2384. if child is None:
  2385. return None
  2386. while child.is_text:
  2387. child = child.previous
  2388. if child is None:
  2389. break
  2390. if child is None or child.tagname != "span":
  2391. return None
  2392. while True:
  2393. if child is None:
  2394. return parent
  2395. if child.tagname in ("a", "sub","sup","body") or child.is_text:
  2396. child = child.next
  2397. continue
  2398. if child.tagname == "span":
  2399. parent = child
  2400. child = child.first_child
  2401. else:
  2402. return parent
  2403. @property
  2404. def tagname( self):
  2405. return mupdf.fz_xml_tag( self.this)
  2406. @property
  2407. def text( self):
  2408. return mupdf.fz_xml_text( self.this)
  2409. add_var = add_code
  2410. add_samp = add_code
  2411. add_kbd = add_code
  2412. class Colorspace:
  2413. def __init__(self, type_):
  2414. """Supported are GRAY, RGB and CMYK."""
  2415. if isinstance( type_, mupdf.FzColorspace):
  2416. self.this = type_
  2417. elif type_ == CS_GRAY:
  2418. self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_GRAY)
  2419. elif type_ == CS_CMYK:
  2420. self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_CMYK)
  2421. elif type_ == CS_RGB:
  2422. self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
  2423. else:
  2424. self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
  2425. def __repr__(self):
  2426. x = ("", "GRAY", "", "RGB", "CMYK")[self.n]
  2427. return "Colorspace(CS_%s) - %s" % (x, self.name)
  2428. def _name(self):
  2429. return mupdf.fz_colorspace_name(self.this)
  2430. @property
  2431. def n(self):
  2432. """Size of one pixel."""
  2433. return mupdf.fz_colorspace_n(self.this)
  2434. @property
  2435. def name(self):
  2436. """Name of the Colorspace."""
  2437. return self._name()
  2438. class DeviceWrapper:
  2439. def __init__(self, *args):
  2440. if args_match( args, mupdf.FzDevice):
  2441. device, = args
  2442. self.this = device
  2443. elif args_match( args, Pixmap, None):
  2444. pm, clip = args
  2445. bbox = JM_irect_from_py( clip)
  2446. if mupdf.fz_is_infinite_irect( bbox):
  2447. self.this = mupdf.fz_new_draw_device( mupdf.FzMatrix(), pm)
  2448. else:
  2449. self.this = mupdf.fz_new_draw_device_with_bbox( mupdf.FzMatrix(), pm, bbox)
  2450. elif args_match( args, mupdf.FzDisplayList):
  2451. dl, = args
  2452. self.this = mupdf.fz_new_list_device( dl)
  2453. elif args_match( args, mupdf.FzStextPage, None):
  2454. tp, flags = args
  2455. opts = mupdf.FzStextOptions( flags)
  2456. self.this = mupdf.fz_new_stext_device( tp, opts)
  2457. else:
  2458. raise Exception( f'Unrecognised args for DeviceWrapper: {args!r}')
  2459. class DisplayList:
  2460. def __del__(self):
  2461. if not type(self) is DisplayList: return
  2462. self.thisown = False
  2463. def __init__(self, *args):
  2464. if len(args) == 1 and isinstance(args[0], mupdf.FzRect):
  2465. self.this = mupdf.FzDisplayList(args[0])
  2466. elif len(args) == 1 and isinstance(args[0], mupdf.FzDisplayList):
  2467. self.this = args[0]
  2468. else:
  2469. assert 0, f'Unrecognised {args=}'
  2470. def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None):
  2471. if isinstance(colorspace, Colorspace):
  2472. colorspace = colorspace.this
  2473. else:
  2474. colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
  2475. val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None)
  2476. val.thisown = True
  2477. return val
  2478. def get_textpage(self, flags=3):
  2479. """Make a TextPage from a DisplayList."""
  2480. stext_options = mupdf.FzStextOptions()
  2481. stext_options.flags = flags
  2482. val = mupdf.FzStextPage(self.this, stext_options)
  2483. val.thisown = True
  2484. return val
  2485. @property
  2486. def rect(self):
  2487. val = JM_py_from_rect(mupdf.fz_bound_display_list(self.this))
  2488. val = Rect(val)
  2489. return val
  2490. def run(self, dw, m, area):
  2491. mupdf.fz_run_display_list(
  2492. self.this,
  2493. dw.device,
  2494. JM_matrix_from_py(m),
  2495. JM_rect_from_py(area),
  2496. mupdf.FzCookie(),
  2497. )
  2498. if g_use_extra:
  2499. extra_FzDocument_insert_pdf = extra.FzDocument_insert_pdf
  2500. class Document:
  2501. def __contains__(self, loc) -> bool:
  2502. if type(loc) is int:
  2503. if loc < self.page_count:
  2504. return True
  2505. return False
  2506. if type(loc) not in (tuple, list) or len(loc) != 2:
  2507. return False
  2508. chapter, pno = loc
  2509. if (0
  2510. or not isinstance(chapter, int)
  2511. or chapter < 0
  2512. or chapter >= self.chapter_count
  2513. ):
  2514. return False
  2515. if (0
  2516. or not isinstance(pno, int)
  2517. or pno < 0
  2518. or pno >= self.chapter_page_count(chapter)
  2519. ):
  2520. return False
  2521. return True
  2522. def __delitem__(self, i)->None:
  2523. if not self.is_pdf:
  2524. raise ValueError("is no PDF")
  2525. if type(i) is int:
  2526. return self.delete_page(i)
  2527. if type(i) in (list, tuple, range):
  2528. return self.delete_pages(i)
  2529. if type(i) is not slice:
  2530. raise ValueError("bad argument type")
  2531. pc = self.page_count
  2532. start = i.start if i.start else 0
  2533. stop = i.stop if i.stop else pc
  2534. step = i.step if i.step else 1
  2535. while start < 0:
  2536. start += pc
  2537. if start >= pc:
  2538. raise ValueError("bad page number(s)")
  2539. while stop < 0:
  2540. stop += pc
  2541. if stop > pc:
  2542. raise ValueError("bad page number(s)")
  2543. return self.delete_pages(range(start, stop, step))
  2544. def __enter__(self):
  2545. return self
  2546. def __exit__(self, *args):
  2547. self.close()
  2548. @typing.overload
  2549. def __getitem__(self, i: int = 0) -> Page:
  2550. ...
  2551. if sys.version_info >= (3, 9):
  2552. @typing.overload
  2553. def __getitem__(self, i: slice) -> list[Page]:
  2554. ...
  2555. @typing.overload
  2556. def __getitem__(self, i: tuple[int, int]) -> Page:
  2557. ...
  2558. def __getitem__(self, i=0):
  2559. if isinstance(i, slice):
  2560. return [self[j] for j in range(*i.indices(len(self)))]
  2561. assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)), \
  2562. f'Invalid item number: {i=}.'
  2563. if i not in self:
  2564. raise IndexError(f"page {i} not in document")
  2565. return self.load_page(i)
  2566. def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0, height=0, fontsize=11):
  2567. """Creates a document. Use 'open' as a synonym.
  2568. Notes:
  2569. Basic usages:
  2570. open() - new PDF document
  2571. open(filename) - string or pathlib.Path, must have supported
  2572. file extension.
  2573. open(type, buffer) - type: valid extension, buffer: bytes object.
  2574. open(stream=buffer, filetype=type) - keyword version of previous.
  2575. open(filename, fileype=type) - filename with unrecognized extension.
  2576. rect, width, height, fontsize: layout reflowable document
  2577. on open (e.g. EPUB). Ignored if n/a.
  2578. """
  2579. # We temporarily set JM_mupdf_show_errors=0 while we are constructing,
  2580. # then restore its original value in a `finally:` block.
  2581. #
  2582. global JM_mupdf_show_errors
  2583. JM_mupdf_show_errors_old = JM_mupdf_show_errors
  2584. JM_mupdf_show_errors = 0
  2585. try:
  2586. self.is_closed = False
  2587. self.is_encrypted = False
  2588. self.is_encrypted = False
  2589. self.metadata = None
  2590. self.FontInfos = []
  2591. self.Graftmaps = {}
  2592. self.ShownPages = {}
  2593. self.InsertedImages = {}
  2594. self._page_refs = weakref.WeakValueDictionary()
  2595. if isinstance(filename, mupdf.PdfDocument):
  2596. pdf_document = filename
  2597. self.this = pdf_document
  2598. self.this_is_pdf = True
  2599. return
  2600. w = width
  2601. h = height
  2602. r = JM_rect_from_py(rect)
  2603. if not mupdf.fz_is_infinite_rect(r):
  2604. w = r.x1 - r.x0
  2605. h = r.y1 - r.y0
  2606. self._name = filename
  2607. self.stream = stream
  2608. if stream is not None:
  2609. if filename is not None and filetype is None:
  2610. # 2025-05-06: Use <filename> as the filetype. This is
  2611. # reversing precedence - we used to use <filename> if both
  2612. # were set.
  2613. filetype = filename
  2614. if isinstance(stream, (bytes, memoryview)):
  2615. pass
  2616. elif isinstance(stream, bytearray):
  2617. stream = bytes(stream)
  2618. elif isinstance(stream, io.BytesIO):
  2619. stream = stream.getvalue()
  2620. else:
  2621. raise TypeError(f"bad stream: {type(stream)=}.")
  2622. self.stream = stream
  2623. assert isinstance(stream, (bytes, memoryview))
  2624. if len(stream) == 0:
  2625. # MuPDF raise an exception for this but also generates
  2626. # warnings, which is not very helpful for us. So instead we
  2627. # raise a specific exception.
  2628. raise EmptyFileError('Cannot open empty stream.')
  2629. stream2 = mupdf.fz_open_memory(mupdf.python_buffer_data(stream), len(stream))
  2630. try:
  2631. doc = mupdf.fz_open_document_with_stream(filetype if filetype else '', stream2)
  2632. except Exception as e:
  2633. if g_exceptions_verbose > 1: exception_info()
  2634. raise FileDataError('Failed to open stream') from e
  2635. elif filename:
  2636. assert not stream
  2637. if isinstance(filename, str):
  2638. pass
  2639. elif hasattr(filename, "absolute"):
  2640. filename = str(filename)
  2641. elif hasattr(filename, "name"):
  2642. filename = filename.name
  2643. else:
  2644. raise TypeError(f"bad filename: {type(filename)=} {filename=}.")
  2645. self._name = filename
  2646. # Generate our own specific exceptions. This avoids MuPDF
  2647. # generating warnings etc.
  2648. if not os.path.exists(filename):
  2649. raise FileNotFoundError(f"no such file: '{filename}'")
  2650. elif not os.path.isfile(filename):
  2651. raise FileDataError(f"'{filename}' is no file")
  2652. elif os.path.getsize(filename) == 0:
  2653. raise EmptyFileError(f'Cannot open empty file: {filename=}.')
  2654. if filetype:
  2655. # Override the type implied by <filename>. MuPDF does not
  2656. # have a way to do this directly so we open via a stream.
  2657. try:
  2658. fz_stream = mupdf.fz_open_file(filename)
  2659. doc = mupdf.fz_open_document_with_stream(filetype, fz_stream)
  2660. except Exception as e:
  2661. if g_exceptions_verbose > 1: exception_info()
  2662. raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e
  2663. else:
  2664. try:
  2665. doc = mupdf.fz_open_document(filename)
  2666. except Exception as e:
  2667. if g_exceptions_verbose > 1: exception_info()
  2668. raise FileDataError(f'Failed to open file {filename!r}.') from e
  2669. else:
  2670. pdf = mupdf.PdfDocument()
  2671. doc = mupdf.FzDocument(pdf)
  2672. if w > 0 and h > 0:
  2673. mupdf.fz_layout_document(doc, w, h, fontsize)
  2674. elif mupdf.fz_is_document_reflowable(doc):
  2675. mupdf.fz_layout_document(doc, 400, 600, 11)
  2676. self.this = doc
  2677. # fixme: not sure where self.thisown gets initialised in PyMuPDF.
  2678. #
  2679. self.thisown = True
  2680. if self.thisown:
  2681. self._graft_id = TOOLS.gen_id()
  2682. if self.needs_pass:
  2683. self.is_encrypted = True
  2684. else: # we won't init until doc is decrypted
  2685. self.init_doc()
  2686. # the following hack detects invalid/empty SVG files, which else may lead
  2687. # to interpreter crashes
  2688. if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower():
  2689. try:
  2690. _ = self.convert_to_pdf() # this seems to always work
  2691. except Exception as e:
  2692. if g_exceptions_verbose > 1: exception_info()
  2693. raise FileDataError("cannot open broken document") from e
  2694. if g_use_extra:
  2695. self.this_is_pdf = isinstance( self.this, mupdf.PdfDocument)
  2696. if self.this_is_pdf:
  2697. self.page_count2 = extra.page_count_pdf
  2698. else:
  2699. self.page_count2 = extra.page_count_fz
  2700. finally:
  2701. JM_mupdf_show_errors = JM_mupdf_show_errors_old
  2702. def __len__(self) -> int:
  2703. return self.page_count
  2704. def __repr__(self) -> str:
  2705. m = "closed " if self.is_closed else ""
  2706. if self.stream is None:
  2707. if self.name == "":
  2708. return m + "Document(<new PDF, doc# %i>)" % self._graft_id
  2709. return m + "Document('%s')" % (self.name,)
  2710. return m + "Document('%s', <memory, doc# %i>)" % (self.name, self._graft_id)
  2711. def _addFormFont(self, name, font):
  2712. """Add new form font."""
  2713. if self.is_closed or self.is_encrypted:
  2714. raise ValueError("document closed or encrypted")
  2715. pdf = _as_pdf_document(self, required=0)
  2716. if not pdf.m_internal:
  2717. return
  2718. fonts = mupdf.pdf_dict_getl(
  2719. mupdf.pdf_trailer( pdf),
  2720. PDF_NAME('Root'),
  2721. PDF_NAME('AcroForm'),
  2722. PDF_NAME('DR'),
  2723. PDF_NAME('Font'),
  2724. )
  2725. if not fonts.m_internal or not mupdf.pdf_is_dict( fonts):
  2726. raise RuntimeError( "PDF has no form fonts yet")
  2727. k = mupdf.pdf_new_name( name)
  2728. v = JM_pdf_obj_from_str( pdf, font)
  2729. mupdf.pdf_dict_put( fonts, k, v)
  2730. def del_toc_item(
  2731. self,
  2732. idx: int,
  2733. ) -> None:
  2734. """Delete TOC / bookmark item by index."""
  2735. xref = self.get_outline_xrefs()[idx]
  2736. self._remove_toc_item(xref)
  2737. def _delToC(self):
  2738. """Delete the TOC."""
  2739. if self.is_closed or self.is_encrypted:
  2740. raise ValueError("document closed or encrypted")
  2741. xrefs = [] # create Python list
  2742. pdf = _as_pdf_document(self, required=0)
  2743. if not pdf.m_internal:
  2744. return xrefs # not a pdf
  2745. # get the main root
  2746. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  2747. # get the outline root
  2748. olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
  2749. if not olroot.m_internal:
  2750. return xrefs # no outlines or some problem
  2751. first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) # first outline
  2752. xrefs = JM_outline_xrefs(first, xrefs)
  2753. xref_count = len(xrefs)
  2754. olroot_xref = mupdf.pdf_to_num(olroot) # delete OL root
  2755. mupdf.pdf_delete_object(pdf, olroot_xref) # delete OL root
  2756. mupdf.pdf_dict_del(root, PDF_NAME('Outlines')) # delete OL root
  2757. for i in range(xref_count):
  2758. _, xref = JM_INT_ITEM(xrefs, i)
  2759. mupdf.pdf_delete_object(pdf, xref) # delete outline item
  2760. xrefs.append(olroot_xref)
  2761. val = xrefs
  2762. self.init_doc()
  2763. return val
  2764. def _delete_page(self, pno):
  2765. pdf = _as_pdf_document(self)
  2766. mupdf.pdf_delete_page( pdf, pno)
  2767. if pdf.m_internal.rev_page_map:
  2768. mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
  2769. def _deleteObject(self, xref):
  2770. """Delete object."""
  2771. pdf = _as_pdf_document(self)
  2772. if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
  2773. raise ValueError( MSG_BAD_XREF)
  2774. mupdf.pdf_delete_object(pdf, xref)
  2775. def _do_links(
  2776. doc1: 'Document',
  2777. doc2: 'Document',
  2778. from_page: int = -1,
  2779. to_page: int = -1,
  2780. start_at: int = -1,
  2781. ) -> None:
  2782. """Insert links contained in copied page range into destination PDF.
  2783. Parameter values **must** equal those of method insert_pdf(), which must
  2784. have been previously executed.
  2785. """
  2786. #pymupdf.log( 'utils.do_links()')
  2787. # --------------------------------------------------------------------------
  2788. # internal function to create the actual "/Annots" object string
  2789. # --------------------------------------------------------------------------
  2790. def cre_annot(lnk, xref_dst, pno_src, ctm):
  2791. """Create annotation object string for a passed-in link."""
  2792. r = lnk["from"] * ctm # rect in PDF coordinates
  2793. rect = _format_g(tuple(r))
  2794. if lnk["kind"] == LINK_GOTO:
  2795. txt = annot_skel["goto1"] # annot_goto
  2796. idx = pno_src.index(lnk["page"])
  2797. p = lnk["to"] * ctm # target point in PDF coordinates
  2798. annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect)
  2799. elif lnk["kind"] == LINK_GOTOR:
  2800. if lnk["page"] >= 0:
  2801. txt = annot_skel["gotor1"] # annot_gotor
  2802. pnt = lnk.get("to", Point(0, 0)) # destination point
  2803. if type(pnt) is not Point:
  2804. pnt = Point(0, 0)
  2805. annot = txt(
  2806. lnk["page"],
  2807. pnt.x,
  2808. pnt.y,
  2809. lnk["zoom"],
  2810. lnk["file"],
  2811. lnk["file"],
  2812. rect,
  2813. )
  2814. else:
  2815. txt = annot_skel["gotor2"] # annot_gotor_n
  2816. to = get_pdf_str(lnk["to"])
  2817. to = to[1:-1]
  2818. f = lnk["file"]
  2819. annot = txt(to, f, rect)
  2820. elif lnk["kind"] == LINK_LAUNCH:
  2821. txt = annot_skel["launch"] # annot_launch
  2822. annot = txt(lnk["file"], lnk["file"], rect)
  2823. elif lnk["kind"] == LINK_URI:
  2824. txt = annot_skel["uri"] # annot_uri
  2825. annot = txt(lnk["uri"], rect)
  2826. else:
  2827. annot = ""
  2828. return annot
  2829. # --------------------------------------------------------------------------
  2830. # validate & normalize parameters
  2831. if from_page < 0:
  2832. fp = 0
  2833. elif from_page >= doc2.page_count:
  2834. fp = doc2.page_count - 1
  2835. else:
  2836. fp = from_page
  2837. if to_page < 0 or to_page >= doc2.page_count:
  2838. tp = doc2.page_count - 1
  2839. else:
  2840. tp = to_page
  2841. if start_at < 0:
  2842. raise ValueError("'start_at' must be >= 0")
  2843. sa = start_at
  2844. incr = 1 if fp <= tp else -1 # page range could be reversed
  2845. # lists of source / destination page numbers
  2846. pno_src = list(range(fp, tp + incr, incr))
  2847. pno_dst = [sa + i for i in range(len(pno_src))]
  2848. # lists of source / destination page xrefs
  2849. xref_src = []
  2850. xref_dst = []
  2851. for i in range(len(pno_src)):
  2852. p_src = pno_src[i]
  2853. p_dst = pno_dst[i]
  2854. old_xref = doc2.page_xref(p_src)
  2855. new_xref = doc1.page_xref(p_dst)
  2856. xref_src.append(old_xref)
  2857. xref_dst.append(new_xref)
  2858. # create the links for each copied page in destination PDF
  2859. for i in range(len(xref_src)):
  2860. page_src = doc2[pno_src[i]] # load source page
  2861. links = page_src.get_links() # get all its links
  2862. #log( '{pno_src=}')
  2863. #log( '{type(page_src)=}')
  2864. #log( '{page_src=}')
  2865. #log( '{=i len(links)}')
  2866. if len(links) == 0: # no links there
  2867. page_src = None
  2868. continue
  2869. ctm = ~page_src.transformation_matrix # calc page transformation matrix
  2870. page_dst = doc1[pno_dst[i]] # load destination page
  2871. link_tab = [] # store all link definitions here
  2872. for l in links:
  2873. if l["kind"] == LINK_GOTO and (l["page"] not in pno_src):
  2874. continue # GOTO link target not in copied pages
  2875. annot_text = cre_annot(l, xref_dst, pno_src, ctm)
  2876. if annot_text:
  2877. link_tab.append(annot_text)
  2878. if link_tab != []:
  2879. page_dst._addAnnot_FromString( tuple(link_tab))
  2880. #log( 'utils.do_links() returning.')
  2881. def _do_widgets(
  2882. tar: 'Document',
  2883. src: 'Document',
  2884. graftmap,
  2885. from_page: int = -1,
  2886. to_page: int = -1,
  2887. start_at: int = -1,
  2888. join_duplicates=0,
  2889. ) -> None:
  2890. """Insert widgets of copied page range into target PDF.
  2891. Parameter values **must** equal those of method insert_pdf() which
  2892. must have been previously executed.
  2893. """
  2894. if not src.is_form_pdf: # nothing to do: source PDF has no fields
  2895. return
  2896. def clean_kid_parents(acro_fields):
  2897. """ Make sure all kids have correct "Parent" pointers."""
  2898. for i in range(acro_fields.pdf_array_len()):
  2899. parent = acro_fields.pdf_array_get(i)
  2900. kids = parent.pdf_dict_get(PDF_NAME("Kids"))
  2901. for j in range(kids.pdf_array_len()):
  2902. kid = kids.pdf_array_get(j)
  2903. kid.pdf_dict_put(PDF_NAME("Parent"), parent)
  2904. def join_widgets(pdf, acro_fields, xref1, xref2, name):
  2905. """Called for each pair of widgets having the same name.
  2906. Args:
  2907. pdf: target MuPDF document
  2908. acro_fields: object Root/AcroForm/Fields
  2909. xref1, xref2: widget xrefs having same names
  2910. name: (str) the name
  2911. Result:
  2912. Defined or updated widget parent that points to both widgets.
  2913. """
  2914. def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2):
  2915. """Merge widget in xref2 into "Kids" list of widget xref1.
  2916. Args:
  2917. xref1, kids1: target widget and its "Kids" array.
  2918. xref2, kids2: source wwidget and its "Kids" array (may be empty).
  2919. """
  2920. # make indirect objects from widgets
  2921. w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0)
  2922. w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0)
  2923. # find source widget in "Fields" array
  2924. idx = acro_fields.pdf_array_find(w2_ind)
  2925. acro_fields.pdf_array_delete(idx)
  2926. if not kids2.pdf_is_array(): # source widget has no kids
  2927. widget = mupdf.pdf_load_object(pdf, xref2)
  2928. # delete name from widget and insert target as parent
  2929. widget.pdf_dict_del(PDF_NAME("T"))
  2930. widget.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
  2931. # put in target Kids
  2932. kids1.pdf_array_push(w2_ind)
  2933. else: # copy source kids to target kids
  2934. for i in range(kids2.pdf_array_len()):
  2935. kid = kids2.pdf_array_get(i)
  2936. kid.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
  2937. kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0)
  2938. kids1.pdf_array_push(kid_ind)
  2939. def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name):
  2940. """Make new "Parent" for two widgets with same name.
  2941. Args:
  2942. xref1, w1: first widget
  2943. xref2, w2: second widget
  2944. name: field name
  2945. Result:
  2946. Both widgets have no "Kids". We create a new object with the
  2947. name and a "Kids" array containing the widgets.
  2948. Original widgets must be removed from AcroForm/Fields.
  2949. """
  2950. # make new "Parent" object
  2951. new = mupdf.pdf_new_dict(pdf, 5)
  2952. new.pdf_dict_put_text_string(PDF_NAME("T"), name)
  2953. kids = new.pdf_dict_put_array(PDF_NAME("Kids"), 2)
  2954. new_obj = mupdf.pdf_add_object(pdf, new)
  2955. new_obj_xref = new_obj.pdf_to_num()
  2956. new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0)
  2957. # copy over some required source widget properties
  2958. ft = w1.pdf_dict_get(PDF_NAME("FT"))
  2959. w1.pdf_dict_del(PDF_NAME("FT"))
  2960. new_obj.pdf_dict_put(PDF_NAME("FT"), ft)
  2961. aa = w1.pdf_dict_get(PDF_NAME("AA"))
  2962. w1.pdf_dict_del(PDF_NAME("AA"))
  2963. new_obj.pdf_dict_put(PDF_NAME("AA"), aa)
  2964. # remove name field, insert "Parent" field in source widgets
  2965. w1.pdf_dict_del(PDF_NAME("T"))
  2966. w1.pdf_dict_put(PDF_NAME("Parent"), new_ind)
  2967. w2.pdf_dict_del(PDF_NAME("T"))
  2968. w2.pdf_dict_put(PDF_NAME("Parent"), new_ind)
  2969. # put source widgets in "kids" array
  2970. ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0)
  2971. ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0)
  2972. kids.pdf_array_push(ind1)
  2973. kids.pdf_array_push(ind2)
  2974. # remove source widgets from "AcroForm/Fields"
  2975. idx = acro_fields.pdf_array_find(ind1)
  2976. acro_fields.pdf_array_delete(idx)
  2977. idx = acro_fields.pdf_array_find(ind2)
  2978. acro_fields.pdf_array_delete(idx)
  2979. acro_fields.pdf_array_push(new_ind)
  2980. w1 = mupdf.pdf_load_object(pdf, xref1)
  2981. w2 = mupdf.pdf_load_object(pdf, xref2)
  2982. kids1 = w1.pdf_dict_get(PDF_NAME("Kids"))
  2983. kids2 = w2.pdf_dict_get(PDF_NAME("Kids"))
  2984. # check which widget has a suitable "Kids" array
  2985. if kids1.pdf_is_array():
  2986. re_target(pdf, acro_fields, xref1, kids1, xref2, kids2) # pylint: disable=arguments-out-of-order
  2987. elif kids2.pdf_is_array():
  2988. re_target(pdf, acro_fields, xref2, kids2, xref1, kids1) # pylint: disable=arguments-out-of-order
  2989. else:
  2990. new_target(pdf, acro_fields, xref1, w1, xref2, w2, name) # pylint: disable=arguments-out-of-order
  2991. def get_kids(parent, kids_list):
  2992. """Return xref list of leaf kids for a parent.
  2993. Call with an empty list.
  2994. """
  2995. kids = mupdf.pdf_dict_get(parent, PDF_NAME("Kids"))
  2996. if not kids.pdf_is_array():
  2997. return kids_list
  2998. for i in range(kids.pdf_array_len()):
  2999. kid = kids.pdf_array_get(i)
  3000. if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, PDF_NAME("Kids"))):
  3001. kids_list = get_kids(kid, kids_list)
  3002. else:
  3003. kids_list.append(kid.pdf_to_num())
  3004. return kids_list
  3005. def kids_xrefs(widget):
  3006. """Get the xref of top "Parent" and the list of leaf widgets."""
  3007. kids_list = []
  3008. parent = mupdf.pdf_dict_get(widget, PDF_NAME("Parent"))
  3009. parent_xref = parent.pdf_to_num()
  3010. if parent_xref == 0:
  3011. return parent_xref, kids_list
  3012. kids_list = get_kids(parent, kids_list)
  3013. return parent_xref, kids_list
  3014. def deduplicate_names(pdf, acro_fields, join_duplicates=False):
  3015. """Handle any widget name duplicates caused by the merge."""
  3016. names = {} # key is a widget name, value a list of widgets having it.
  3017. # extract all names and widgets in "AcroForm/Fields"
  3018. for i in range(mupdf.pdf_array_len(acro_fields)):
  3019. wobject = mupdf.pdf_array_get(acro_fields, i)
  3020. xref = wobject.pdf_to_num()
  3021. # extract widget name and collect widget(s) using it
  3022. T = mupdf.pdf_dict_get_text_string(wobject, PDF_NAME("T"))
  3023. xrefs = names.get(T, [])
  3024. xrefs.append(xref)
  3025. names[T] = xrefs
  3026. for name, xrefs in names.items():
  3027. if len(xrefs) < 2:
  3028. continue
  3029. xref0, xref1 = xrefs[:2] # only exactly 2 should occur!
  3030. if join_duplicates: # combine fields with equal names
  3031. join_widgets(pdf, acro_fields, xref0, xref1, name)
  3032. else: # make field names unique
  3033. newname = name + f" [{xref1}]" # append this to the name
  3034. wobject = mupdf.pdf_load_object(pdf, xref1)
  3035. wobject.pdf_dict_put_text_string(PDF_NAME("T"), newname)
  3036. clean_kid_parents(acro_fields)
  3037. def get_acroform(doc):
  3038. """Retrieve the AcroForm dictionary form a PDF."""
  3039. pdf = mupdf.pdf_document_from_fz_document(doc)
  3040. # AcroForm (= central form field info)
  3041. return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm")
  3042. tarpdf = mupdf.pdf_document_from_fz_document(tar)
  3043. srcpdf = mupdf.pdf_document_from_fz_document(src)
  3044. if tar.is_form_pdf:
  3045. # target is a Form PDF, so use it to include source fields
  3046. acro = get_acroform(tar)
  3047. # Important arrays in AcroForm
  3048. acro_fields = acro.pdf_dict_get(PDF_NAME("Fields"))
  3049. tar_co = acro.pdf_dict_get(PDF_NAME("CO"))
  3050. if not tar_co.pdf_is_array():
  3051. tar_co = acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
  3052. else:
  3053. # target is no Form PDF, so copy over source AcroForm
  3054. acro = mupdf.pdf_deep_copy_obj(get_acroform(src)) # make a copy
  3055. # Clear "Fields" and "CO" arrays: will be populated by page fields.
  3056. # This is required to avoid copying unneeded objects.
  3057. acro.pdf_dict_del(PDF_NAME("Fields"))
  3058. acro.pdf_dict_put_array(PDF_NAME("Fields"), 5)
  3059. acro.pdf_dict_del(PDF_NAME("CO"))
  3060. acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
  3061. # Enrich AcroForm for copying to target
  3062. acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro)
  3063. # Insert AcroForm into target PDF
  3064. acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft)
  3065. acro_fields = acro_tar.pdf_dict_get(PDF_NAME("Fields"))
  3066. tar_co = acro_tar.pdf_dict_get(PDF_NAME("CO"))
  3067. # get its xref and insert it into target catalog
  3068. tar_xref = acro_tar.pdf_to_num()
  3069. acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
  3070. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), PDF_NAME("Root"))
  3071. root.pdf_dict_put(PDF_NAME("AcroForm"), acro_tar_ind)
  3072. if from_page <= to_page:
  3073. src_range = range(from_page, to_page + 1)
  3074. else:
  3075. src_range = range(from_page, to_page - 1, -1)
  3076. parents = {} # information about widget parents
  3077. # remove "P" owning page reference from all widgets of all source pages
  3078. for i in src_range:
  3079. src_page = src[i]
  3080. for xref in [
  3081. xref
  3082. for xref, wtype, _ in src_page.annot_xrefs()
  3083. if wtype == mupdf.PDF_ANNOT_WIDGET # pylint: disable=no-member
  3084. ]:
  3085. w_obj = mupdf.pdf_load_object(srcpdf, xref)
  3086. w_obj.pdf_dict_del(PDF_NAME("P"))
  3087. # get the widget's parent structure
  3088. parent_xref, old_kids = kids_xrefs(w_obj)
  3089. if parent_xref:
  3090. parents[parent_xref] = {
  3091. "new_xref": 0,
  3092. "old_kids": old_kids,
  3093. "new_kids": [],
  3094. }
  3095. # Copy over Parent widgets first - they are not page-dependent
  3096. for xref in parents.keys(): # pylint: disable=consider-using-dict-items
  3097. parent = mupdf.pdf_load_object(srcpdf, xref)
  3098. parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent)
  3099. parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft)
  3100. kids_xrefs_new = get_kids(parent_tar, [])
  3101. parent_xref_new = parent_tar.pdf_to_num()
  3102. parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0)
  3103. acro_fields.pdf_array_push(parent_ind)
  3104. parents[xref]["new_xref"] = parent_xref_new
  3105. parents[xref]["new_kids"] = kids_xrefs_new
  3106. for i in range(len(src_range)):
  3107. # read first copied over page in target
  3108. tar_page = tar[start_at + i]
  3109. # read the original page in the source PDF
  3110. src_page = src[src_range[i]]
  3111. # now walk through source page widgets and copy over
  3112. w_xrefs = [ # widget xrefs of the source page
  3113. xref
  3114. for xref, wtype, _ in src_page.annot_xrefs()
  3115. if wtype == mupdf.PDF_ANNOT_WIDGET # pylint: disable=no-member
  3116. ]
  3117. if not w_xrefs: # no widgets on this source page
  3118. continue
  3119. # convert to formal PDF page
  3120. tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page)
  3121. # extract annotations array
  3122. tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), PDF_NAME("Annots"))
  3123. if not mupdf.pdf_is_array(tar_annots):
  3124. tar_annots = mupdf.pdf_dict_put_array(
  3125. tar_page_pdf.obj(), PDF_NAME("Annots"), 5
  3126. )
  3127. for xref in w_xrefs:
  3128. w_obj = mupdf.pdf_load_object(srcpdf, xref)
  3129. # check if field takes part in inter-field validations
  3130. is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C"))
  3131. # check if parent of widget already in target
  3132. parent_xref = mupdf.pdf_to_num(
  3133. w_obj.pdf_dict_get(PDF_NAME("Parent"))
  3134. )
  3135. if parent_xref == 0: # parent not in target yet
  3136. try:
  3137. w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj)
  3138. except Exception as e:
  3139. message_warning(f"cannot copy widget at {xref=}: {e}")
  3140. continue
  3141. w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft)
  3142. tar_xref = w_obj_tar.pdf_to_num()
  3143. w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
  3144. mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
  3145. mupdf.pdf_array_push(acro_fields, w_obj_tar_ind)
  3146. else:
  3147. parent = parents[parent_xref]
  3148. idx = parent["old_kids"].index(xref) # search for xref in parent
  3149. tar_xref = parent["new_kids"][idx]
  3150. w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
  3151. mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
  3152. # Into "AcroForm/CO" if a computation field.
  3153. if is_aac:
  3154. mupdf.pdf_array_push(tar_co, w_obj_tar_ind)
  3155. deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates)
  3156. def _embeddedFileGet(self, idx):
  3157. pdf = _as_pdf_document(self)
  3158. names = mupdf.pdf_dict_getl(
  3159. mupdf.pdf_trailer(pdf),
  3160. PDF_NAME('Root'),
  3161. PDF_NAME('Names'),
  3162. PDF_NAME('EmbeddedFiles'),
  3163. PDF_NAME('Names'),
  3164. )
  3165. entry = mupdf.pdf_array_get(names, 2*idx+1)
  3166. filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
  3167. buf = mupdf.pdf_load_stream(filespec)
  3168. cont = JM_BinFromBuffer(buf)
  3169. return cont
  3170. def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int:
  3171. filenames = self.embfile_names()
  3172. msg = "'%s' not in EmbeddedFiles array." % str(item)
  3173. if item in filenames:
  3174. idx = filenames.index(item)
  3175. elif item in range(len(filenames)):
  3176. idx = item
  3177. else:
  3178. raise ValueError(msg)
  3179. return idx
  3180. def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None):
  3181. pdf = _as_pdf_document(self)
  3182. data = JM_BufferFromBytes(buffer_)
  3183. if not data.m_internal:
  3184. raise TypeError( MSG_BAD_BUFFER)
  3185. names = mupdf.pdf_dict_getl(
  3186. mupdf.pdf_trailer(pdf),
  3187. PDF_NAME('Root'),
  3188. PDF_NAME('Names'),
  3189. PDF_NAME('EmbeddedFiles'),
  3190. PDF_NAME('Names'),
  3191. )
  3192. if not mupdf.pdf_is_array(names):
  3193. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  3194. names = mupdf.pdf_new_array(pdf, 6) # an even number!
  3195. mupdf.pdf_dict_putl(
  3196. root,
  3197. names,
  3198. PDF_NAME('Names'),
  3199. PDF_NAME('EmbeddedFiles'),
  3200. PDF_NAME('Names'),
  3201. )
  3202. fileentry = JM_embed_file(pdf, data, filename, ufilename, desc, 1)
  3203. xref = mupdf.pdf_to_num(
  3204. mupdf.pdf_dict_getl(fileentry, PDF_NAME('EF'), PDF_NAME('F'))
  3205. )
  3206. mupdf.pdf_array_push(names, mupdf.pdf_new_text_string(name))
  3207. mupdf.pdf_array_push(names, fileentry)
  3208. return xref
  3209. def _embfile_del(self, idx):
  3210. pdf = _as_pdf_document(self)
  3211. names = mupdf.pdf_dict_getl(
  3212. mupdf.pdf_trailer(pdf),
  3213. PDF_NAME('Root'),
  3214. PDF_NAME('Names'),
  3215. PDF_NAME('EmbeddedFiles'),
  3216. PDF_NAME('Names'),
  3217. )
  3218. mupdf.pdf_array_delete(names, idx + 1)
  3219. mupdf.pdf_array_delete(names, idx)
  3220. def _embfile_info(self, idx, infodict):
  3221. pdf = _as_pdf_document(self)
  3222. xref = 0
  3223. ci_xref=0
  3224. trailer = mupdf.pdf_trailer(pdf)
  3225. names = mupdf.pdf_dict_getl(
  3226. trailer,
  3227. PDF_NAME('Root'),
  3228. PDF_NAME('Names'),
  3229. PDF_NAME('EmbeddedFiles'),
  3230. PDF_NAME('Names'),
  3231. )
  3232. o = mupdf.pdf_array_get(names, 2*idx+1)
  3233. ci = mupdf.pdf_dict_get(o, PDF_NAME('CI'))
  3234. if ci.m_internal:
  3235. ci_xref = mupdf.pdf_to_num(ci)
  3236. infodict["collection"] = ci_xref
  3237. name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('F')))
  3238. infodict[dictkey_filename] = JM_EscapeStrFromStr(name)
  3239. name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('UF')))
  3240. infodict[dictkey_ufilename] = JM_EscapeStrFromStr(name)
  3241. name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('Desc')))
  3242. infodict[dictkey_descr] = JM_UnicodeFromStr(name)
  3243. len_ = -1
  3244. DL = -1
  3245. fileentry = mupdf.pdf_dict_getl(o, PDF_NAME('EF'), PDF_NAME('F'))
  3246. xref = mupdf.pdf_to_num(fileentry)
  3247. o = mupdf.pdf_dict_get(fileentry, PDF_NAME('Length'))
  3248. if o.m_internal:
  3249. len_ = mupdf.pdf_to_int(o)
  3250. o = mupdf.pdf_dict_get(fileentry, PDF_NAME('DL'))
  3251. if o.m_internal:
  3252. DL = mupdf.pdf_to_int(o)
  3253. else:
  3254. o = mupdf.pdf_dict_getl(fileentry, PDF_NAME('Params'), PDF_NAME('Size'))
  3255. if o.m_internal:
  3256. DL = mupdf.pdf_to_int(o)
  3257. infodict[dictkey_size] = DL
  3258. infodict[dictkey_length] = len_
  3259. return xref
  3260. def _embfile_names(self, namelist):
  3261. """Get list of embedded file names."""
  3262. pdf = _as_pdf_document(self)
  3263. names = mupdf.pdf_dict_getl(
  3264. mupdf.pdf_trailer(pdf),
  3265. PDF_NAME('Root'),
  3266. PDF_NAME('Names'),
  3267. PDF_NAME('EmbeddedFiles'),
  3268. PDF_NAME('Names'),
  3269. )
  3270. if mupdf.pdf_is_array(names):
  3271. n = mupdf.pdf_array_len(names)
  3272. for i in range(0, n, 2):
  3273. val = JM_EscapeStrFromStr(
  3274. mupdf.pdf_to_text_string(
  3275. mupdf.pdf_array_get(names, i)
  3276. )
  3277. )
  3278. namelist.append(val)
  3279. def _embfile_upd(self, idx, buffer_=None, filename=None, ufilename=None, desc=None):
  3280. pdf = _as_pdf_document(self)
  3281. xref = 0
  3282. names = mupdf.pdf_dict_getl(
  3283. mupdf.pdf_trailer(pdf),
  3284. PDF_NAME('Root'),
  3285. PDF_NAME('Names'),
  3286. PDF_NAME('EmbeddedFiles'),
  3287. PDF_NAME('Names'),
  3288. )
  3289. entry = mupdf.pdf_array_get(names, 2*idx+1)
  3290. filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
  3291. if not filespec.m_internal:
  3292. RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
  3293. res = JM_BufferFromBytes(buffer_)
  3294. if buffer_ and buffer_.m_internal and not res.m_internal:
  3295. raise TypeError( MSG_BAD_BUFFER)
  3296. if res.m_internal and buffer_ and buffer_.m_internal:
  3297. JM_update_stream(pdf, filespec, res, 1)
  3298. # adjust /DL and /Size parameters
  3299. len, _ = mupdf.fz_buffer_storage(res)
  3300. l = mupdf.pdf_new_int(len)
  3301. mupdf.pdf_dict_put(filespec, PDF_NAME('DL'), l)
  3302. mupdf.pdf_dict_putl(filespec, l, PDF_NAME('Params'), PDF_NAME('Size'))
  3303. xref = mupdf.pdf_to_num(filespec)
  3304. if filename:
  3305. mupdf.pdf_dict_put_text_string(entry, PDF_NAME('F'), filename)
  3306. if ufilename:
  3307. mupdf.pdf_dict_put_text_string(entry, PDF_NAME('UF'), ufilename)
  3308. if desc:
  3309. mupdf.pdf_dict_put_text_string(entry, PDF_NAME('Desc'), desc)
  3310. return xref
  3311. def _extend_toc_items(self, items):
  3312. """Add color info to all items of an extended TOC list."""
  3313. if self.is_closed:
  3314. raise ValueError("document closed")
  3315. if g_use_extra:
  3316. return extra.Document_extend_toc_items( self.this, items)
  3317. pdf = _as_pdf_document(self)
  3318. zoom = "zoom"
  3319. bold = "bold"
  3320. italic = "italic"
  3321. collapse = "collapse"
  3322. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  3323. if not root.m_internal:
  3324. return
  3325. olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
  3326. if not olroot.m_internal:
  3327. return
  3328. first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
  3329. if not first.m_internal:
  3330. return
  3331. xrefs = []
  3332. xrefs = JM_outline_xrefs(first, xrefs)
  3333. n = len(xrefs)
  3334. m = len(items)
  3335. if not n:
  3336. return
  3337. if n != m:
  3338. raise IndexError( "internal error finding outline xrefs")
  3339. # update all TOC item dictionaries
  3340. for i in range(n):
  3341. xref = int(xrefs[i])
  3342. item = items[i]
  3343. itemdict = item[3]
  3344. if not isinstance(itemdict, dict):
  3345. raise ValueError( "need non-simple TOC format")
  3346. itemdict[dictkey_xref] = xrefs[i]
  3347. bm = mupdf.pdf_load_object(pdf, xref)
  3348. flags = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('F')))
  3349. if flags == 1:
  3350. itemdict[italic] = True
  3351. elif flags == 2:
  3352. itemdict[bold] = True
  3353. elif flags == 3:
  3354. itemdict[italic] = True
  3355. itemdict[bold] = True
  3356. count = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('Count')))
  3357. if count < 0:
  3358. itemdict[collapse] = True
  3359. elif count > 0:
  3360. itemdict[collapse] = False
  3361. col = mupdf.pdf_dict_get(bm, PDF_NAME('C'))
  3362. if mupdf.pdf_is_array(col) and mupdf.pdf_array_len(col) == 3:
  3363. color = (
  3364. mupdf.pdf_to_real(mupdf.pdf_array_get(col, 0)),
  3365. mupdf.pdf_to_real(mupdf.pdf_array_get(col, 1)),
  3366. mupdf.pdf_to_real(mupdf.pdf_array_get(col, 2)),
  3367. )
  3368. itemdict[dictkey_color] = color
  3369. z=0
  3370. obj = mupdf.pdf_dict_get(bm, PDF_NAME('Dest'))
  3371. if not obj.m_internal or not mupdf.pdf_is_array(obj):
  3372. obj = mupdf.pdf_dict_getl(bm, PDF_NAME('A'), PDF_NAME('D'))
  3373. if mupdf.pdf_is_array(obj) and mupdf.pdf_array_len(obj) == 5:
  3374. z = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, 4))
  3375. itemdict[zoom] = float(z)
  3376. item[3] = itemdict
  3377. items[i] = item
  3378. def _forget_page(self, page: Page):
  3379. """Remove a page from document page dict."""
  3380. pid = id(page)
  3381. if pid in self._page_refs:
  3382. #self._page_refs[pid] = None
  3383. del self._page_refs[pid]
  3384. def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limit: int, idx: int = 0):
  3385. pdf = _as_pdf_document(self)
  3386. mylimit = limit
  3387. if mylimit < 256:
  3388. mylimit = 256
  3389. if ordering >= 0:
  3390. data, size, index = mupdf.fz_lookup_cjk_font(ordering)
  3391. font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
  3392. else:
  3393. data, size = mupdf.fz_lookup_base14_font(bfname)
  3394. if data:
  3395. font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
  3396. else:
  3397. buf = JM_get_fontbuffer(pdf, xref)
  3398. if not buf.m_internal:
  3399. raise Exception("font at xref %d is not supported" % xref)
  3400. font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0)
  3401. wlist = []
  3402. for i in range(mylimit):
  3403. glyph = mupdf.fz_encode_character(font, i)
  3404. adv = mupdf.fz_advance_glyph(font, glyph, 0)
  3405. if ordering >= 0:
  3406. glyph = i
  3407. if glyph > 0:
  3408. wlist.append( (glyph, adv))
  3409. else:
  3410. wlist.append( (glyph, 0.0))
  3411. return wlist
  3412. def _get_page_labels(self):
  3413. pdf = _as_pdf_document(self)
  3414. rc = []
  3415. pagelabels = mupdf.pdf_new_name("PageLabels")
  3416. obj = mupdf.pdf_dict_getl( mupdf.pdf_trailer(pdf), PDF_NAME('Root'), pagelabels)
  3417. if not obj.m_internal:
  3418. return rc
  3419. # simple case: direct /Nums object
  3420. nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Nums')))
  3421. if nums.m_internal:
  3422. JM_get_page_labels(rc, nums)
  3423. return rc
  3424. # case: /Kids/Nums
  3425. nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_getl(obj, PDF_NAME('Kids'), PDF_NAME('Nums')))
  3426. if nums.m_internal:
  3427. JM_get_page_labels(rc, nums)
  3428. return rc
  3429. # case: /Kids is an array of multiple /Nums
  3430. kids = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Kids')))
  3431. if not kids.m_internal or not mupdf.pdf_is_array(kids):
  3432. return rc
  3433. n = mupdf.pdf_array_len(kids)
  3434. for i in range(n):
  3435. nums = mupdf.pdf_resolve_indirect(
  3436. mupdf.pdf_dict_get(
  3437. mupdf.pdf_array_get(kids, i),
  3438. PDF_NAME('Nums'),
  3439. )
  3440. )
  3441. JM_get_page_labels(rc, nums)
  3442. return rc
  3443. def _getMetadata(self, key):
  3444. """Get metadata."""
  3445. try:
  3446. return mupdf.fz_lookup_metadata2( self.this, key)
  3447. except Exception:
  3448. if g_exceptions_verbose > 2: exception_info()
  3449. return ''
  3450. def _getOLRootNumber(self):
  3451. """Get xref of Outline Root, create it if missing."""
  3452. if self.is_closed or self.is_encrypted:
  3453. raise ValueError("document closed or encrypted")
  3454. pdf = _as_pdf_document(self)
  3455. # get main root
  3456. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  3457. # get outline root
  3458. olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
  3459. if not olroot.m_internal:
  3460. olroot = mupdf.pdf_new_dict( pdf, 4)
  3461. mupdf.pdf_dict_put( olroot, PDF_NAME('Type'), PDF_NAME('Outlines'))
  3462. ind_obj = mupdf.pdf_add_object( pdf, olroot)
  3463. mupdf.pdf_dict_put( root, PDF_NAME('Outlines'), ind_obj)
  3464. olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
  3465. return mupdf.pdf_to_num( olroot)
  3466. def _getPDFfileid(self):
  3467. """Get PDF file id."""
  3468. pdf = _as_pdf_document(self, required=0)
  3469. if not pdf.m_internal:
  3470. return
  3471. idlist = []
  3472. identity = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
  3473. if identity.m_internal:
  3474. n = mupdf.pdf_array_len(identity)
  3475. for i in range(n):
  3476. o = mupdf.pdf_array_get(identity, i)
  3477. text = mupdf.pdf_to_text_string(o)
  3478. hex_ = binascii.hexlify(text)
  3479. idlist.append(hex_)
  3480. return idlist
  3481. def _getPageInfo(self, pno, what):
  3482. """List fonts, images, XObjects used on a page."""
  3483. if self.is_closed or self.is_encrypted:
  3484. raise ValueError("document closed or encrypted")
  3485. doc = self.this
  3486. pageCount = mupdf.pdf_count_pages(doc) if isinstance(doc, mupdf.PdfDocument) else mupdf.fz_count_pages(doc)
  3487. n = pno # pno < 0 is allowed
  3488. while n < 0:
  3489. n += pageCount # make it non-negative
  3490. if n >= pageCount:
  3491. raise ValueError( MSG_BAD_PAGENO)
  3492. pdf = _as_pdf_document(self)
  3493. pageref = mupdf.pdf_lookup_page_obj(pdf, n)
  3494. rsrc = mupdf.pdf_dict_get_inheritable(pageref, mupdf.PDF_ENUM_NAME_Resources)
  3495. liste = []
  3496. tracer = []
  3497. if rsrc.m_internal:
  3498. JM_scan_resources(pdf, rsrc, liste, what, 0, tracer)
  3499. return liste
  3500. def _insert_font(self, fontfile=None, fontbuffer=None):
  3501. '''
  3502. Utility: insert font from file or binary.
  3503. '''
  3504. pdf = _as_pdf_document(self)
  3505. if not fontfile and not fontbuffer:
  3506. raise ValueError( MSG_FILE_OR_BUFFER)
  3507. value = JM_insert_font(pdf, None, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1)
  3508. return value
  3509. def _loadOutline(self):
  3510. """Load first outline."""
  3511. doc = self.this
  3512. assert isinstance( doc, mupdf.FzDocument)
  3513. try:
  3514. ol = mupdf.fz_load_outline( doc)
  3515. except Exception:
  3516. if g_exceptions_verbose > 1: exception_info()
  3517. return
  3518. return Outline( ol)
  3519. def _make_page_map(self):
  3520. """Make an array page number -> page object."""
  3521. if self.is_closed:
  3522. raise ValueError("document closed")
  3523. assert 0, f'_make_page_map() is no-op'
  3524. def _move_copy_page(self, pno, nb, before, copy):
  3525. """Move or copy a PDF page reference."""
  3526. pdf = _as_pdf_document(self)
  3527. same = 0
  3528. # get the two page objects -----------------------------------
  3529. # locate the /Kids arrays and indices in each
  3530. page1, parent1, i1 = pdf_lookup_page_loc( pdf, pno)
  3531. kids1 = mupdf.pdf_dict_get( parent1, PDF_NAME('Kids'))
  3532. page2, parent2, i2 = pdf_lookup_page_loc( pdf, nb)
  3533. kids2 = mupdf.pdf_dict_get( parent2, PDF_NAME('Kids'))
  3534. if before: # calc index of source page in target /Kids
  3535. pos = i2
  3536. else:
  3537. pos = i2 + 1
  3538. # same /Kids array? ------------------------------------------
  3539. same = mupdf.pdf_objcmp( kids1, kids2)
  3540. # put source page in target /Kids array ----------------------
  3541. if not copy and same != 0: # update parent in page object
  3542. mupdf.pdf_dict_put( page1, PDF_NAME('Parent'), parent2)
  3543. mupdf.pdf_array_insert( kids2, page1, pos)
  3544. if same != 0: # different /Kids arrays ----------------------
  3545. parent = parent2
  3546. while parent.m_internal: # increase /Count objects in parents
  3547. count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
  3548. mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
  3549. parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
  3550. if not copy: # delete original item
  3551. mupdf.pdf_array_delete( kids1, i1)
  3552. parent = parent1
  3553. while parent.m_internal: # decrease /Count objects in parents
  3554. count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
  3555. mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count - 1)
  3556. parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
  3557. else: # same /Kids array
  3558. if copy: # source page is copied
  3559. parent = parent2
  3560. while parent.m_internal: # increase /Count object in parents
  3561. count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
  3562. mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
  3563. parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
  3564. else:
  3565. if i1 < pos:
  3566. mupdf.pdf_array_delete( kids1, i1)
  3567. else:
  3568. mupdf.pdf_array_delete( kids1, i1 + 1)
  3569. if pdf.m_internal.rev_page_map: # page map no longer valid: drop it
  3570. mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
  3571. self._reset_page_refs()
  3572. def _newPage(self, pno=-1, width=595, height=842):
  3573. """Make a new PDF page."""
  3574. if self.is_closed or self.is_encrypted:
  3575. raise ValueError("document closed or encrypted")
  3576. if g_use_extra:
  3577. extra._newPage( self.this, pno, width, height)
  3578. else:
  3579. pdf = _as_pdf_document(self)
  3580. mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  3581. mediabox.x1 = width
  3582. mediabox.y1 = height
  3583. contents = mupdf.FzBuffer()
  3584. if pno < -1:
  3585. raise ValueError( MSG_BAD_PAGENO)
  3586. # create /Resources and /Contents objects
  3587. #resources = pdf.add_object(pdf.new_dict(1))
  3588. resources = mupdf.pdf_add_new_dict(pdf, 1)
  3589. page_obj = mupdf.pdf_add_page( pdf, mediabox, 0, resources, contents)
  3590. mupdf.pdf_insert_page( pdf, pno, page_obj)
  3591. # fixme: pdf->dirty = 1;
  3592. self._reset_page_refs()
  3593. return self[pno]
  3594. def _remove_links_to(self, numbers):
  3595. pdf = _as_pdf_document(self)
  3596. _remove_dest_range(pdf, numbers)
  3597. def _remove_toc_item(self, xref):
  3598. # "remove" bookmark by letting it point to nowhere
  3599. pdf = _as_pdf_document(self)
  3600. item = mupdf.pdf_new_indirect(pdf, xref, 0)
  3601. mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
  3602. mupdf.pdf_dict_del( item, PDF_NAME('A'))
  3603. color = mupdf.pdf_new_array( pdf, 3)
  3604. for i in range(3):
  3605. mupdf.pdf_array_push_real( color, 0.8)
  3606. mupdf.pdf_dict_put( item, PDF_NAME('C'), color)
  3607. def _reset_page_refs(self):
  3608. """Invalidate all pages in document dictionary."""
  3609. if getattr(self, "is_closed", True):
  3610. return
  3611. pages = [p for p in self._page_refs.values()]
  3612. for page in pages:
  3613. if page:
  3614. page._erase()
  3615. page = None
  3616. self._page_refs.clear()
  3617. def _set_page_labels(self, labels):
  3618. pdf = _as_pdf_document(self)
  3619. pagelabels = mupdf.pdf_new_name("PageLabels")
  3620. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  3621. mupdf.pdf_dict_del(root, pagelabels)
  3622. mupdf.pdf_dict_putl(root, mupdf.pdf_new_array(pdf, 0), pagelabels, PDF_NAME('Nums'))
  3623. xref = self.pdf_catalog()
  3624. text = self.xref_object(xref, compressed=True)
  3625. text = text.replace("/Nums[]", "/Nums[%s]" % labels)
  3626. self.update_object(xref, text)
  3627. def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None):
  3628. '''
  3629. "update" bookmark by letting it point to nowhere
  3630. '''
  3631. pdf = _as_pdf_document(self)
  3632. item = mupdf.pdf_new_indirect( pdf, xref, 0)
  3633. if title:
  3634. mupdf.pdf_dict_put_text_string( item, PDF_NAME('Title'), title)
  3635. if action:
  3636. mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
  3637. obj = JM_pdf_obj_from_str( pdf, action)
  3638. mupdf.pdf_dict_put( item, PDF_NAME('A'), obj)
  3639. mupdf.pdf_dict_put_int( item, PDF_NAME('F'), flags)
  3640. if color:
  3641. c = mupdf.pdf_new_array( pdf, 3)
  3642. for i in range(3):
  3643. f = color[i]
  3644. mupdf.pdf_array_push_real( c, f)
  3645. mupdf.pdf_dict_put( item, PDF_NAME('C'), c)
  3646. elif color is not None:
  3647. mupdf.pdf_dict_del( item, PDF_NAME('C'))
  3648. if collapse is not None:
  3649. if mupdf.pdf_dict_get( item, PDF_NAME('Count')).m_internal:
  3650. i = mupdf.pdf_dict_get_int( item, PDF_NAME('Count'))
  3651. if (i < 0 and collapse is False) or (i > 0 and collapse is True):
  3652. i = i * (-1)
  3653. mupdf.pdf_dict_put_int( item, PDF_NAME('Count'), i)
  3654. @property
  3655. def FormFonts(self):
  3656. """Get list of field font resource names."""
  3657. pdf = _as_pdf_document(self, required=0)
  3658. if not pdf.m_internal:
  3659. return
  3660. fonts = mupdf.pdf_dict_getl(
  3661. mupdf.pdf_trailer(pdf),
  3662. PDF_NAME('Root'),
  3663. PDF_NAME('AcroForm'),
  3664. PDF_NAME('DR'),
  3665. PDF_NAME('Font'),
  3666. )
  3667. liste = list()
  3668. if fonts.m_internal and mupdf.pdf_is_dict(fonts): # fonts exist
  3669. n = mupdf.pdf_dict_len(fonts)
  3670. for i in range(n):
  3671. f = mupdf.pdf_dict_get_key(fonts, i)
  3672. liste.append(JM_UnicodeFromStr(mupdf.pdf_to_name(f)))
  3673. return liste
  3674. def add_layer(self, name, creator=None, on=None):
  3675. """Add a new OC layer."""
  3676. pdf = _as_pdf_document(self)
  3677. JM_add_layer_config( pdf, name, creator, on)
  3678. mupdf.ll_pdf_read_ocg( pdf.m_internal)
  3679. def add_ocg(self, name, config=-1, on=1, intent=None, usage=None):
  3680. """Add new optional content group."""
  3681. xref = 0
  3682. pdf = _as_pdf_document(self)
  3683. # make the OCG
  3684. ocg = mupdf.pdf_add_new_dict(pdf, 3)
  3685. mupdf.pdf_dict_put(ocg, PDF_NAME('Type'), PDF_NAME('OCG'))
  3686. mupdf.pdf_dict_put_text_string(ocg, PDF_NAME('Name'), name)
  3687. intents = mupdf.pdf_dict_put_array(ocg, PDF_NAME('Intent'), 2)
  3688. if not intent:
  3689. mupdf.pdf_array_push(intents, PDF_NAME('View'))
  3690. elif not isinstance(intent, str):
  3691. assert 0, f'fixme: intent is not a str. {type(intent)=} {type=}'
  3692. #n = len(intent)
  3693. #for i in range(n):
  3694. # item = intent[i]
  3695. # c = JM_StrAsChar(item);
  3696. # if (c) {
  3697. # pdf_array_push(gctx, intents, pdf_new_name(gctx, c));
  3698. # }
  3699. # Py_DECREF(item);
  3700. #}
  3701. else:
  3702. mupdf.pdf_array_push(intents, mupdf.pdf_new_name(intent))
  3703. use_for = mupdf.pdf_dict_put_dict(ocg, PDF_NAME('Usage'), 3)
  3704. ci_name = mupdf.pdf_new_name("CreatorInfo")
  3705. cre_info = mupdf.pdf_dict_put_dict(use_for, ci_name, 2)
  3706. mupdf.pdf_dict_put_text_string(cre_info, PDF_NAME('Creator'), "PyMuPDF")
  3707. if usage:
  3708. mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), usage)
  3709. else:
  3710. mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), "Artwork")
  3711. indocg = mupdf.pdf_add_object(pdf, ocg)
  3712. # Insert OCG in the right config
  3713. ocp = JM_ensure_ocproperties(pdf)
  3714. obj = mupdf.pdf_dict_get(ocp, PDF_NAME('OCGs'))
  3715. mupdf.pdf_array_push(obj, indocg)
  3716. if config > -1:
  3717. obj = mupdf.pdf_dict_get(ocp, PDF_NAME('Configs'))
  3718. if not mupdf.pdf_is_array(obj):
  3719. raise ValueError( MSG_BAD_OC_CONFIG)
  3720. cfg = mupdf.pdf_array_get(obj, config)
  3721. if not cfg.m_internal:
  3722. raise ValueError( MSG_BAD_OC_CONFIG)
  3723. else:
  3724. cfg = mupdf.pdf_dict_get(ocp, PDF_NAME('D'))
  3725. obj = mupdf.pdf_dict_get(cfg, PDF_NAME('Order'))
  3726. if not obj.m_internal:
  3727. obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('Order'), 1)
  3728. mupdf.pdf_array_push(obj, indocg)
  3729. if on:
  3730. obj = mupdf.pdf_dict_get(cfg, PDF_NAME('ON'))
  3731. if not obj.m_internal:
  3732. obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('ON'), 1)
  3733. else:
  3734. obj =mupdf.pdf_dict_get(cfg, PDF_NAME('OFF'))
  3735. if not obj.m_internal:
  3736. obj =mupdf.pdf_dict_put_array(cfg, PDF_NAME('OFF'), 1)
  3737. mupdf.pdf_array_push(obj, indocg)
  3738. # let MuPDF take note: re-read OCProperties
  3739. mupdf.ll_pdf_read_ocg(pdf.m_internal)
  3740. xref = mupdf.pdf_to_num(indocg)
  3741. return xref
  3742. def authenticate(self, password):
  3743. """Decrypt document."""
  3744. if self.is_closed:
  3745. raise ValueError("document closed")
  3746. val = mupdf.fz_authenticate_password(self.this, password)
  3747. if val: # the doc is decrypted successfully and we init the outline
  3748. self.is_encrypted = False
  3749. self.is_encrypted = False
  3750. self.init_doc()
  3751. self.thisown = True
  3752. return val
  3753. def can_save_incrementally(self):
  3754. """Check whether incremental saves are possible."""
  3755. pdf = _as_pdf_document(self, required=0)
  3756. if not pdf.m_internal:
  3757. return False
  3758. return mupdf.pdf_can_be_saved_incrementally(pdf)
  3759. def bake(self, *, annots: bool = True, widgets: bool = True) -> None:
  3760. """Convert annotations or fields to permanent content.
  3761. Notes:
  3762. Converts annotations or widgets to permanent page content, like
  3763. text and vector graphics, as appropriate.
  3764. After execution, pages will still look the same, but no longer
  3765. have annotations, respectively no fields.
  3766. If widgets are selected the PDF will no longer be a Form PDF.
  3767. Args:
  3768. annots: convert annotations
  3769. widgets: convert form fields
  3770. """
  3771. pdf = _as_pdf_document(self)
  3772. mupdf.pdf_bake_document(pdf, int(annots), int(widgets))
  3773. @property
  3774. def chapter_count(self):
  3775. """Number of chapters."""
  3776. if self.is_closed:
  3777. raise ValueError("document closed")
  3778. return mupdf.fz_count_chapters( self.this)
  3779. def chapter_page_count(self, chapter):
  3780. """Page count of chapter."""
  3781. if self.is_closed:
  3782. raise ValueError("document closed")
  3783. chapters = mupdf.fz_count_chapters( self.this)
  3784. if chapter < 0 or chapter >= chapters:
  3785. raise ValueError( "bad chapter number")
  3786. pages = mupdf.fz_count_chapter_pages( self.this, chapter)
  3787. return pages
  3788. def close(self):
  3789. """Close document."""
  3790. if getattr(self, "is_closed", True):
  3791. raise ValueError("document closed")
  3792. # self._cleanup()
  3793. if hasattr(self, "_outline") and self._outline:
  3794. self._outline = None
  3795. self._reset_page_refs()
  3796. #self.metadata = None
  3797. #self.stream = None
  3798. self.is_closed = True
  3799. #self.FontInfos = []
  3800. self.Graftmaps = {} # Fixes test_3140().
  3801. #self.ShownPages = {}
  3802. #self.InsertedImages = {}
  3803. #self.this = None
  3804. self.this = None
  3805. def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0):
  3806. """Convert document to a PDF, selecting page range and optional rotation. Output bytes object."""
  3807. if self.is_closed or self.is_encrypted:
  3808. raise ValueError("document closed or encrypted")
  3809. fz_doc = self.this
  3810. fp = from_page
  3811. tp = to_page
  3812. srcCount = mupdf.fz_count_pages(fz_doc)
  3813. if fp < 0:
  3814. fp = 0
  3815. if fp > srcCount - 1:
  3816. fp = srcCount - 1
  3817. if tp < 0:
  3818. tp = srcCount - 1
  3819. if tp > srcCount - 1:
  3820. tp = srcCount - 1
  3821. len0 = len(JM_mupdf_warnings_store)
  3822. doc = JM_convert_to_pdf(fz_doc, fp, tp, rotate)
  3823. len1 = len(JM_mupdf_warnings_store)
  3824. for i in range(len0, len1):
  3825. message(f'{JM_mupdf_warnings_store[i]}')
  3826. return doc
  3827. def copy_page(self, pno: int, to: int =-1):
  3828. """Copy a page within a PDF document.
  3829. This will only create another reference of the same page object.
  3830. Args:
  3831. pno: source page number
  3832. to: put before this page, '-1' means after last page.
  3833. """
  3834. if self.is_closed:
  3835. raise ValueError("document closed")
  3836. page_count = len(self)
  3837. if (
  3838. pno not in range(page_count)
  3839. or to not in range(-1, page_count)
  3840. ):
  3841. raise ValueError("bad page number(s)")
  3842. before = 1
  3843. copy = 1
  3844. if to == -1:
  3845. to = page_count - 1
  3846. before = 0
  3847. return self._move_copy_page(pno, to, before, copy)
  3848. def del_xml_metadata(self):
  3849. """Delete XML metadata."""
  3850. if self.is_closed or self.is_encrypted:
  3851. raise ValueError("document closed or encrypted")
  3852. pdf = _as_pdf_document(self)
  3853. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  3854. if root.m_internal:
  3855. mupdf.pdf_dict_del( root, PDF_NAME('Metadata'))
  3856. def delete_page(self, pno: int =-1):
  3857. """ Delete one page from a PDF.
  3858. """
  3859. return self.delete_pages(pno)
  3860. def delete_pages(self, *args, **kw):
  3861. """Delete pages from a PDF.
  3862. Args:
  3863. Either keywords 'from_page'/'to_page', or two integers to
  3864. specify the first/last page to delete.
  3865. Or a list/tuple/range object, which can contain arbitrary
  3866. page numbers.
  3867. Or a single integer page number.
  3868. """
  3869. if not self.is_pdf:
  3870. raise ValueError("is no PDF")
  3871. if self.is_closed:
  3872. raise ValueError("document closed")
  3873. page_count = self.page_count # page count of document
  3874. f = t = -1
  3875. if kw: # check if keywords were used
  3876. if args: # then no positional args are allowed
  3877. raise ValueError("cannot mix keyword and positional argument")
  3878. f = kw.get("from_page", -1) # first page to delete
  3879. t = kw.get("to_page", -1) # last page to delete
  3880. while f < 0:
  3881. f += page_count
  3882. while t < 0:
  3883. t += page_count
  3884. if not f <= t < page_count:
  3885. raise ValueError("bad page number(s)")
  3886. numbers = tuple(range(f, t + 1))
  3887. else:
  3888. if len(args) > 2 or args == []:
  3889. raise ValueError("need 1 or 2 positional arguments")
  3890. if len(args) == 2:
  3891. f, t = args
  3892. if not (type(f) is int and type(t) is int):
  3893. raise ValueError("both arguments must be int")
  3894. if f > t:
  3895. f, t = t, f
  3896. if not f <= t < page_count:
  3897. raise ValueError("bad page number(s)")
  3898. numbers = tuple(range(f, t + 1))
  3899. elif isinstance(args[0], int):
  3900. pno = args[0]
  3901. while pno < 0:
  3902. pno += page_count
  3903. numbers = (pno,)
  3904. else:
  3905. numbers = tuple(args[0])
  3906. numbers = list(map(int, set(numbers))) # ensure unique integers
  3907. if numbers == []:
  3908. message("nothing to delete")
  3909. return
  3910. numbers.sort()
  3911. if numbers[0] < 0 or numbers[-1] >= page_count:
  3912. raise ValueError("bad page number(s)")
  3913. frozen_numbers = frozenset(numbers)
  3914. toc = self.get_toc()
  3915. for i, xref in enumerate(self.get_outline_xrefs()):
  3916. if toc[i][2] - 1 in frozen_numbers:
  3917. self._remove_toc_item(xref) # remove target in PDF object
  3918. self._remove_links_to(frozen_numbers)
  3919. for i in reversed(numbers): # delete pages, last to first
  3920. self._delete_page(i)
  3921. self._reset_page_refs()
  3922. def embfile_add(self,
  3923. name: str,
  3924. buffer_: ByteString,
  3925. filename: OptStr =None,
  3926. ufilename: OptStr =None,
  3927. desc: OptStr =None,
  3928. ) -> None:
  3929. """Add an item to the EmbeddedFiles array.
  3930. Args:
  3931. name: name of the new item, must not already exist.
  3932. buffer_: (binary data) the file content.
  3933. filename: (str) the file name, default: the name
  3934. ufilename: (unicode) the file name, default: filename
  3935. desc: (str) the description.
  3936. """
  3937. filenames = self.embfile_names()
  3938. msg = "Name '%s' already exists." % str(name)
  3939. if name in filenames:
  3940. raise ValueError(msg)
  3941. if filename is None:
  3942. filename = name
  3943. if ufilename is None:
  3944. ufilename = filename
  3945. if desc is None:
  3946. desc = name
  3947. xref = self._embfile_add(
  3948. name,
  3949. buffer_=buffer_,
  3950. filename=filename,
  3951. ufilename=ufilename,
  3952. desc=desc,
  3953. )
  3954. date = get_pdf_now()
  3955. self.xref_set_key(xref, "Type", "/EmbeddedFile")
  3956. self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date))
  3957. self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
  3958. return xref
  3959. def embfile_count(self) -> int:
  3960. """Get number of EmbeddedFiles."""
  3961. return len(self.embfile_names())
  3962. def embfile_del(self, item: typing.Union[int, str]):
  3963. """Delete an entry from EmbeddedFiles.
  3964. Notes:
  3965. The argument must be name or index of an EmbeddedFiles item.
  3966. Physical deletion of data will happen on save to a new
  3967. file with appropriate garbage option.
  3968. Args:
  3969. item: name or number of item.
  3970. Returns:
  3971. None
  3972. """
  3973. idx = self._embeddedFileIndex(item)
  3974. return self._embfile_del(idx)
  3975. def embfile_get(self, item: typing.Union[int, str]) -> bytes:
  3976. """Get the content of an item in the EmbeddedFiles array.
  3977. Args:
  3978. item: number or name of item.
  3979. Returns:
  3980. (bytes) The file content.
  3981. """
  3982. idx = self._embeddedFileIndex(item)
  3983. return self._embeddedFileGet(idx)
  3984. def embfile_info(self, item: typing.Union[int, str]) -> dict:
  3985. """Get information of an item in the EmbeddedFiles array.
  3986. Args:
  3987. item: number or name of item.
  3988. Returns:
  3989. Information dictionary.
  3990. """
  3991. idx = self._embeddedFileIndex(item)
  3992. infodict = {"name": self.embfile_names()[idx]}
  3993. xref = self._embfile_info(idx, infodict)
  3994. t, date = self.xref_get_key(xref, "Params/CreationDate")
  3995. if t != "null":
  3996. infodict["creationDate"] = date
  3997. t, date = self.xref_get_key(xref, "Params/ModDate")
  3998. if t != "null":
  3999. infodict["modDate"] = date
  4000. t, md5 = self.xref_get_key(xref, "Params/CheckSum")
  4001. if t != "null":
  4002. infodict["checksum"] = binascii.hexlify(md5.encode()).decode()
  4003. return infodict
  4004. def embfile_names(self) -> list:
  4005. """Get list of names of EmbeddedFiles."""
  4006. filenames = []
  4007. self._embfile_names(filenames)
  4008. return filenames
  4009. def embfile_upd(self,
  4010. item: typing.Union[int, str],
  4011. buffer_: OptBytes =None,
  4012. filename: OptStr =None,
  4013. ufilename: OptStr =None,
  4014. desc: OptStr =None,
  4015. ) -> None:
  4016. """Change an item of the EmbeddedFiles array.
  4017. Notes:
  4018. Only provided parameters are changed. If all are omitted,
  4019. the method is a no-op.
  4020. Args:
  4021. item: number or name of item.
  4022. buffer_: (binary data) the new file content.
  4023. filename: (str) the new file name.
  4024. ufilename: (unicode) the new filen ame.
  4025. desc: (str) the new description.
  4026. """
  4027. idx = self._embeddedFileIndex(item)
  4028. xref = self._embfile_upd(
  4029. idx,
  4030. buffer_=buffer_,
  4031. filename=filename,
  4032. ufilename=ufilename,
  4033. desc=desc,
  4034. )
  4035. date = get_pdf_now()
  4036. self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
  4037. return xref
  4038. def extract_font(self, xref=0, info_only=0, named=None):
  4039. '''
  4040. Get a font by xref. Returns a tuple or dictionary.
  4041. '''
  4042. #log( '{=xref info_only}')
  4043. pdf = _as_pdf_document(self)
  4044. obj = mupdf.pdf_load_object(pdf, xref)
  4045. type_ = mupdf.pdf_dict_get(obj, PDF_NAME('Type'))
  4046. subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
  4047. if (mupdf.pdf_name_eq(type_, PDF_NAME('Font'))
  4048. and not mupdf.pdf_to_name( subtype).startswith('CIDFontType')
  4049. ):
  4050. basefont = mupdf.pdf_dict_get(obj, PDF_NAME('BaseFont'))
  4051. if not basefont.m_internal or mupdf.pdf_is_null(basefont):
  4052. bname = mupdf.pdf_dict_get(obj, PDF_NAME('Name'))
  4053. else:
  4054. bname = basefont
  4055. ext = JM_get_fontextension(pdf, xref)
  4056. if ext != 'n/a' and not info_only:
  4057. buffer_ = JM_get_fontbuffer(pdf, xref)
  4058. bytes_ = JM_BinFromBuffer(buffer_)
  4059. else:
  4060. bytes_ = b''
  4061. if not named:
  4062. rc = (
  4063. JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
  4064. JM_UnicodeFromStr(ext),
  4065. JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
  4066. bytes_,
  4067. )
  4068. else:
  4069. rc = {
  4070. dictkey_name: JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
  4071. dictkey_ext: JM_UnicodeFromStr(ext),
  4072. dictkey_type: JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
  4073. dictkey_content: bytes_,
  4074. }
  4075. else:
  4076. if not named:
  4077. rc = '', '', '', b''
  4078. else:
  4079. rc = {
  4080. dictkey_name: '',
  4081. dictkey_ext: '',
  4082. dictkey_type: '',
  4083. dictkey_content: b'',
  4084. }
  4085. return rc
  4086. def extract_image(self, xref):
  4087. """Get image by xref. Returns a dictionary."""
  4088. if self.is_closed or self.is_encrypted:
  4089. raise ValueError("document closed or encrypted")
  4090. pdf = _as_pdf_document(self)
  4091. if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
  4092. raise ValueError( MSG_BAD_XREF)
  4093. obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  4094. subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
  4095. if not mupdf.pdf_name_eq(subtype, PDF_NAME('Image')):
  4096. raise ValueError( "not an image")
  4097. o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask'))
  4098. if o.m_internal:
  4099. smask = mupdf.pdf_to_num(o)
  4100. else:
  4101. smask = 0
  4102. # load the image
  4103. img = mupdf.pdf_load_image(pdf, obj)
  4104. rc = dict()
  4105. _make_image_dict(img, rc)
  4106. rc[dictkey_smask] = smask
  4107. rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace())
  4108. return rc
  4109. def ez_save(
  4110. self,
  4111. filename,
  4112. garbage=3,
  4113. clean=False,
  4114. deflate=True,
  4115. deflate_images=True,
  4116. deflate_fonts=True,
  4117. incremental=False,
  4118. ascii=False,
  4119. expand=False,
  4120. linear=False,
  4121. pretty=False,
  4122. encryption=1,
  4123. permissions=4095,
  4124. owner_pw=None,
  4125. user_pw=None,
  4126. no_new_id=True,
  4127. preserve_metadata=1,
  4128. use_objstms=1,
  4129. compression_effort=0,
  4130. ):
  4131. '''
  4132. Save PDF using some different defaults
  4133. '''
  4134. return self.save(
  4135. filename,
  4136. garbage=garbage,
  4137. clean=clean,
  4138. deflate=deflate,
  4139. deflate_images=deflate_images,
  4140. deflate_fonts=deflate_fonts,
  4141. incremental=incremental,
  4142. ascii=ascii,
  4143. expand=expand,
  4144. linear=linear,
  4145. pretty=pretty,
  4146. encryption=encryption,
  4147. permissions=permissions,
  4148. owner_pw=owner_pw,
  4149. user_pw=user_pw,
  4150. no_new_id=no_new_id,
  4151. preserve_metadata=preserve_metadata,
  4152. use_objstms=use_objstms,
  4153. compression_effort=compression_effort,
  4154. )
  4155. def find_bookmark(self, bm):
  4156. """Find new location after layouting a document."""
  4157. if self.is_closed or self.is_encrypted:
  4158. raise ValueError("document closed or encrypted")
  4159. location = mupdf.fz_lookup_bookmark2( self.this, bm)
  4160. return location.chapter, location.page
  4161. def fullcopy_page(self, pno, to=-1):
  4162. """Make a full page duplicate."""
  4163. pdf = _as_pdf_document(self)
  4164. page_count = mupdf.pdf_count_pages( pdf)
  4165. try:
  4166. if (not _INRANGE(pno, 0, page_count - 1)
  4167. or not _INRANGE(to, -1, page_count - 1)
  4168. ):
  4169. raise ValueError( MSG_BAD_PAGENO)
  4170. page1 = mupdf.pdf_resolve_indirect( mupdf.pdf_lookup_page_obj( pdf, pno))
  4171. page2 = mupdf.pdf_deep_copy_obj( page1)
  4172. old_annots = mupdf.pdf_dict_get( page2, PDF_NAME('Annots'))
  4173. # copy annotations, but remove Popup and IRT types
  4174. if old_annots.m_internal:
  4175. n = mupdf.pdf_array_len( old_annots)
  4176. new_annots = mupdf.pdf_new_array( pdf, n)
  4177. for i in range(n):
  4178. o = mupdf.pdf_array_get( old_annots, i)
  4179. subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
  4180. if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
  4181. continue
  4182. if mupdf.pdf_dict_gets( o, "IRT").m_internal:
  4183. continue
  4184. copy_o = mupdf.pdf_deep_copy_obj( mupdf.pdf_resolve_indirect( o))
  4185. xref = mupdf.pdf_create_object( pdf)
  4186. mupdf.pdf_update_object( pdf, xref, copy_o)
  4187. copy_o = mupdf.pdf_new_indirect( pdf, xref, 0)
  4188. mupdf.pdf_dict_del( copy_o, PDF_NAME('Popup'))
  4189. mupdf.pdf_dict_del( copy_o, PDF_NAME('P'))
  4190. mupdf.pdf_array_push( new_annots, copy_o)
  4191. mupdf.pdf_dict_put( page2, PDF_NAME('Annots'), new_annots)
  4192. # copy the old contents stream(s)
  4193. res = JM_read_contents( page1)
  4194. # create new /Contents object for page2
  4195. if res and res.m_internal:
  4196. #contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" ", 1), NULL, 0)
  4197. contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" "), mupdf.PdfObj(), 0)
  4198. JM_update_stream( pdf, contents, res, 1)
  4199. mupdf.pdf_dict_put( page2, PDF_NAME('Contents'), contents)
  4200. # now insert target page, making sure it is an indirect object
  4201. xref = mupdf.pdf_create_object( pdf) # get new xref
  4202. mupdf.pdf_update_object( pdf, xref, page2) # store new page
  4203. page2 = mupdf.pdf_new_indirect( pdf, xref, 0) # reread object
  4204. mupdf.pdf_insert_page( pdf, to, page2) # and store the page
  4205. finally:
  4206. mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
  4207. self._reset_page_refs()
  4208. def get_char_widths(
  4209. doc: 'Document',
  4210. xref: int,
  4211. limit: int = 256,
  4212. idx: int = 0,
  4213. fontdict: OptDict = None,
  4214. ) -> list:
  4215. """Get list of glyph information of a font.
  4216. Notes:
  4217. Must be provided by its XREF number. If we already dealt with the
  4218. font, it will be recorded in doc.FontInfos. Otherwise we insert an
  4219. entry there.
  4220. Finally we return the glyphs for the font. This is a list of
  4221. (glyph, width) where glyph is an integer controlling the char
  4222. appearance, and width is a float controlling the char's spacing:
  4223. width * fontsize is the actual space.
  4224. For 'simple' fonts, glyph == ord(char) will usually be true.
  4225. Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
  4226. """
  4227. fontinfo = CheckFontInfo(doc, xref)
  4228. if fontinfo is None: # not recorded yet: create it
  4229. if fontdict is None:
  4230. name, ext, stype, asc, dsc = utils._get_font_properties(doc, xref)
  4231. fontdict = {
  4232. "name": name,
  4233. "type": stype,
  4234. "ext": ext,
  4235. "ascender": asc,
  4236. "descender": dsc,
  4237. }
  4238. else:
  4239. name = fontdict["name"]
  4240. ext = fontdict["ext"]
  4241. stype = fontdict["type"]
  4242. ordering = fontdict["ordering"]
  4243. simple = fontdict["simple"]
  4244. if ext == "":
  4245. raise ValueError("xref is not a font")
  4246. # check for 'simple' fonts
  4247. if stype in ("Type1", "MMType1", "TrueType"):
  4248. simple = True
  4249. else:
  4250. simple = False
  4251. # check for CJK fonts
  4252. if name in ("Fangti", "Ming"):
  4253. ordering = 0
  4254. elif name in ("Heiti", "Song"):
  4255. ordering = 1
  4256. elif name in ("Gothic", "Mincho"):
  4257. ordering = 2
  4258. elif name in ("Dotum", "Batang"):
  4259. ordering = 3
  4260. else:
  4261. ordering = -1
  4262. fontdict["simple"] = simple
  4263. if name == "ZapfDingbats":
  4264. glyphs = zapf_glyphs
  4265. elif name == "Symbol":
  4266. glyphs = symbol_glyphs
  4267. else:
  4268. glyphs = None
  4269. fontdict["glyphs"] = glyphs
  4270. fontdict["ordering"] = ordering
  4271. fontinfo = [xref, fontdict]
  4272. doc.FontInfos.append(fontinfo)
  4273. else:
  4274. fontdict = fontinfo[1]
  4275. glyphs = fontdict["glyphs"]
  4276. simple = fontdict["simple"]
  4277. ordering = fontdict["ordering"]
  4278. if glyphs is None:
  4279. oldlimit = 0
  4280. else:
  4281. oldlimit = len(glyphs)
  4282. mylimit = max(256, limit)
  4283. if mylimit <= oldlimit:
  4284. return glyphs
  4285. if ordering < 0: # not a CJK font
  4286. glyphs = doc._get_char_widths(
  4287. xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
  4288. )
  4289. else: # CJK fonts use char codes and width = 1
  4290. glyphs = None
  4291. fontdict["glyphs"] = glyphs
  4292. fontinfo[1] = fontdict
  4293. UpdateFontInfo(doc, fontinfo)
  4294. return glyphs
  4295. def get_layer(self, config=-1):
  4296. """Content of ON, OFF, RBGroups of an OC layer."""
  4297. pdf = _as_pdf_document(self)
  4298. ocp = mupdf.pdf_dict_getl(
  4299. mupdf.pdf_trailer( pdf),
  4300. PDF_NAME('Root'),
  4301. PDF_NAME('OCProperties'),
  4302. )
  4303. if not ocp.m_internal:
  4304. return
  4305. if config == -1:
  4306. obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
  4307. else:
  4308. obj = mupdf.pdf_array_get(
  4309. mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
  4310. config,
  4311. )
  4312. if not obj.m_internal:
  4313. raise ValueError( MSG_BAD_OC_CONFIG)
  4314. rc = JM_get_ocg_arrays( obj)
  4315. return rc
  4316. def get_layers(self):
  4317. """Show optional OC layers."""
  4318. pdf = _as_pdf_document(self)
  4319. n = mupdf.pdf_count_layer_configs( pdf)
  4320. if n == 1:
  4321. obj = mupdf.pdf_dict_getl(
  4322. mupdf.pdf_trailer( pdf),
  4323. PDF_NAME('Root'),
  4324. PDF_NAME('OCProperties'),
  4325. PDF_NAME('Configs'),
  4326. )
  4327. if not mupdf.pdf_is_array( obj):
  4328. n = 0
  4329. rc = []
  4330. info = mupdf.PdfLayerConfig()
  4331. for i in range(n):
  4332. mupdf.pdf_layer_config_info( pdf, i, info)
  4333. item = {
  4334. "number": i,
  4335. "name": info.name,
  4336. "creator": info.creator,
  4337. }
  4338. rc.append( item)
  4339. return rc
  4340. def get_new_xref(self):
  4341. """Make new xref."""
  4342. if self.is_closed or self.is_encrypted:
  4343. raise ValueError("document closed or encrypted")
  4344. pdf = _as_pdf_document(self)
  4345. xref = 0
  4346. ENSURE_OPERATION(pdf)
  4347. xref = mupdf.pdf_create_object(pdf)
  4348. return xref
  4349. def get_oc(doc: 'Document', xref: int) -> int:
  4350. """Return optional content object xref for an image or form xobject.
  4351. Args:
  4352. xref: (int) xref number of an image or form xobject.
  4353. """
  4354. if doc.is_closed or doc.is_encrypted:
  4355. raise ValueError("document close or encrypted")
  4356. t, name = doc.xref_get_key(xref, "Subtype")
  4357. if t != "name" or name not in ("/Image", "/Form"):
  4358. raise ValueError("bad object type at xref %i" % xref)
  4359. t, oc = doc.xref_get_key(xref, "OC")
  4360. if t != "xref":
  4361. return 0
  4362. rc = int(oc.replace("0 R", ""))
  4363. return rc
  4364. def get_ocgs(self):
  4365. """Show existing optional content groups."""
  4366. ci = mupdf.pdf_new_name( "CreatorInfo")
  4367. pdf = _as_pdf_document(self)
  4368. ocgs = mupdf.pdf_dict_getl(
  4369. mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')),
  4370. PDF_NAME('OCProperties'),
  4371. PDF_NAME('OCGs'),
  4372. )
  4373. rc = dict()
  4374. if not mupdf.pdf_is_array( ocgs):
  4375. return rc
  4376. n = mupdf.pdf_array_len( ocgs)
  4377. for i in range(n):
  4378. ocg = mupdf.pdf_array_get( ocgs, i)
  4379. xref = mupdf.pdf_to_num( ocg)
  4380. name = mupdf.pdf_to_text_string( mupdf.pdf_dict_get( ocg, PDF_NAME('Name')))
  4381. obj = mupdf.pdf_dict_getl( ocg, PDF_NAME('Usage'), ci, PDF_NAME('Subtype'))
  4382. usage = None
  4383. if obj.m_internal:
  4384. usage = mupdf.pdf_to_name( obj)
  4385. intents = list()
  4386. intent = mupdf.pdf_dict_get( ocg, PDF_NAME('Intent'))
  4387. if intent.m_internal:
  4388. if mupdf.pdf_is_name( intent):
  4389. intents.append( mupdf.pdf_to_name( intent))
  4390. elif mupdf.pdf_is_array( intent):
  4391. m = mupdf.pdf_array_len( intent)
  4392. for j in range(m):
  4393. o = mupdf.pdf_array_get( intent, j)
  4394. if mupdf.pdf_is_name( o):
  4395. intents.append( mupdf.pdf_to_name( o))
  4396. if mupdf_version_tuple >= (1, 26, 11):
  4397. resource_stack = mupdf.PdfResourceStack()
  4398. hidden = mupdf.pdf_is_ocg_hidden( pdf, resource_stack, usage, ocg)
  4399. else:
  4400. hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
  4401. item = {
  4402. "name": name,
  4403. "intent": intents,
  4404. "on": not hidden,
  4405. "usage": usage,
  4406. }
  4407. temp = xref
  4408. rc[ temp] = item
  4409. return rc
  4410. def get_ocmd(doc: 'Document', xref: int) -> dict:
  4411. """Return the definition of an OCMD (optional content membership dictionary).
  4412. Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
  4413. /VE (visibility expression, PDF array). Via string manipulation, this
  4414. info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
  4415. and "ve" - ready to recycle as input for 'set_ocmd()'.
  4416. """
  4417. if xref not in range(doc.xref_length()):
  4418. raise ValueError("bad xref")
  4419. text = doc.xref_object(xref, compressed=True)
  4420. if "/Type/OCMD" not in text:
  4421. raise ValueError("bad object type")
  4422. textlen = len(text)
  4423. p0 = text.find("/OCGs[") # look for /OCGs key
  4424. p1 = text.find("]", p0)
  4425. if p0 < 0 or p1 < 0: # no OCGs found
  4426. ocgs = None
  4427. else:
  4428. ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
  4429. ocgs = list(map(int, ocgs))
  4430. p0 = text.find("/P/") # look for /P policy key
  4431. if p0 < 0:
  4432. policy = None
  4433. else:
  4434. p1 = text.find("ff", p0)
  4435. if p1 < 0:
  4436. p1 = text.find("on", p0)
  4437. if p1 < 0: # some irregular syntax
  4438. raise ValueError("bad object at xref")
  4439. else:
  4440. policy = text[p0 + 3 : p1 + 2]
  4441. p0 = text.find("/VE[") # look for /VE visibility expression key
  4442. if p0 < 0: # no visibility expression found
  4443. ve = None
  4444. else:
  4445. lp = rp = 0 # find end of /VE by finding last ']'.
  4446. p1 = p0
  4447. while lp < 1 or lp != rp:
  4448. p1 += 1
  4449. if not p1 < textlen: # some irregular syntax
  4450. raise ValueError("bad object at xref")
  4451. if text[p1] == "[":
  4452. lp += 1
  4453. if text[p1] == "]":
  4454. rp += 1
  4455. # p1 now positioned at the last "]"
  4456. ve = text[p0 + 3 : p1 + 1] # the PDF /VE array
  4457. ve = (
  4458. ve.replace("/And", '"and",')
  4459. .replace("/Not", '"not",')
  4460. .replace("/Or", '"or",')
  4461. )
  4462. ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
  4463. import json
  4464. try:
  4465. ve = json.loads(ve)
  4466. except Exception:
  4467. exception_info()
  4468. message(f"bad /VE key: {ve!r}")
  4469. raise
  4470. return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}
  4471. def get_outline_xrefs(self):
  4472. """Get list of outline xref numbers."""
  4473. xrefs = []
  4474. pdf = _as_pdf_document(self, required=0)
  4475. if not pdf.m_internal:
  4476. return xrefs
  4477. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  4478. if not root.m_internal:
  4479. return xrefs
  4480. olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
  4481. if not olroot.m_internal:
  4482. return xrefs
  4483. first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
  4484. if not first.m_internal:
  4485. return xrefs
  4486. xrefs = JM_outline_xrefs(first, xrefs)
  4487. return xrefs
  4488. def get_page_fonts(self, pno: int, full: bool =False) -> list:
  4489. """Retrieve a list of fonts used on a page.
  4490. """
  4491. if self.is_closed or self.is_encrypted:
  4492. raise ValueError("document closed or encrypted")
  4493. if not self.is_pdf:
  4494. return ()
  4495. if type(pno) is not int:
  4496. try:
  4497. pno = pno.number
  4498. except Exception:
  4499. exception_info()
  4500. raise ValueError("need a Page or page number")
  4501. val = self._getPageInfo(pno, 1)
  4502. if not full:
  4503. return [v[:-1] for v in val]
  4504. return val
  4505. def get_page_images(self, pno: int, full: bool =False) -> list:
  4506. """Retrieve a list of images used on a page.
  4507. """
  4508. if self.is_closed or self.is_encrypted:
  4509. raise ValueError("document closed or encrypted")
  4510. if not self.is_pdf:
  4511. return ()
  4512. val = self._getPageInfo(pno, 2)
  4513. if not full:
  4514. return [v[:-1] for v in val]
  4515. return val
  4516. def get_page_labels(self):
  4517. """Return page label definitions in PDF document.
  4518. Returns:
  4519. A list of dictionaries with the following format:
  4520. {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
  4521. """
  4522. # Jorj McKie, 2021-01-10
  4523. return [utils.rule_dict(item) for item in self._get_page_labels()]
  4524. def get_page_numbers(doc, label, only_one=False):
  4525. """Return a list of page numbers with the given label.
  4526. Args:
  4527. doc: PDF document object (resp. 'self').
  4528. label: (str) label.
  4529. only_one: (bool) stop searching after first hit.
  4530. Returns:
  4531. List of page numbers having this label.
  4532. """
  4533. # Jorj McKie, 2021-01-06
  4534. numbers = []
  4535. if not label:
  4536. return numbers
  4537. labels = doc._get_page_labels()
  4538. if labels == []:
  4539. return numbers
  4540. for i in range(doc.page_count):
  4541. plabel = utils.get_label_pno(i, labels)
  4542. if plabel == label:
  4543. numbers.append(i)
  4544. if only_one:
  4545. break
  4546. return numbers
  4547. def get_page_pixmap(
  4548. doc: 'Document',
  4549. pno: int,
  4550. *,
  4551. matrix: matrix_like = None,
  4552. dpi=None,
  4553. colorspace: Colorspace = None,
  4554. clip: rect_like = None,
  4555. alpha: bool = False,
  4556. annots: bool = True,
  4557. ) -> 'Pixmap':
  4558. """Create pixmap of document page by page number.
  4559. Notes:
  4560. Convenience function calling page.get_pixmap.
  4561. Args:
  4562. pno: (int) page number
  4563. matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity).
  4564. colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB.
  4565. clip: (irect-like) restrict rendering to this area.
  4566. alpha: (bool) include alpha channel
  4567. annots: (bool) also render annotations
  4568. """
  4569. if matrix is None:
  4570. matrix = Identity
  4571. if colorspace is None:
  4572. colorspace = csRGB
  4573. return doc[pno].get_pixmap(
  4574. matrix=matrix,
  4575. dpi=dpi, colorspace=colorspace,
  4576. clip=clip,
  4577. alpha=alpha,
  4578. annots=annots
  4579. )
  4580. def get_page_text(
  4581. doc: 'Document',
  4582. pno: int,
  4583. option: str = "text",
  4584. clip: rect_like = None,
  4585. flags: OptInt = None,
  4586. textpage: 'TextPage' = None,
  4587. sort: bool = False,
  4588. ) -> typing.Any:
  4589. """Extract a document page's text by page number.
  4590. Notes:
  4591. Convenience function calling page.get_text().
  4592. Args:
  4593. pno: page number
  4594. option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
  4595. Returns:
  4596. output from page.TextPage().
  4597. """
  4598. return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)
  4599. def get_page_xobjects(self, pno: int) -> list:
  4600. """Retrieve a list of XObjects used on a page.
  4601. """
  4602. if self.is_closed or self.is_encrypted:
  4603. raise ValueError("document closed or encrypted")
  4604. if not self.is_pdf:
  4605. return ()
  4606. val = self._getPageInfo(pno, 3)
  4607. return val
  4608. def get_sigflags(self):
  4609. """Get the /SigFlags value."""
  4610. pdf = _as_pdf_document(self, required=0)
  4611. if not pdf.m_internal:
  4612. return -1 # not a PDF
  4613. sigflags = mupdf.pdf_dict_getl(
  4614. mupdf.pdf_trailer(pdf),
  4615. PDF_NAME('Root'),
  4616. PDF_NAME('AcroForm'),
  4617. PDF_NAME('SigFlags'),
  4618. )
  4619. sigflag = -1
  4620. if sigflags.m_internal:
  4621. sigflag = mupdf.pdf_to_int(sigflags)
  4622. return sigflag
  4623. def get_toc(
  4624. doc: 'Document',
  4625. simple: bool = True,
  4626. ) -> list:
  4627. """Create a table of contents.
  4628. Args:
  4629. simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
  4630. """
  4631. def recurse(olItem, liste, lvl):
  4632. """Recursively follow the outline item chain and record item information in a list."""
  4633. while olItem and olItem.this.m_internal:
  4634. if olItem.title:
  4635. title = olItem.title
  4636. else:
  4637. title = " "
  4638. if not olItem.is_external:
  4639. if olItem.uri:
  4640. if olItem.page == -1:
  4641. resolve = doc.resolve_link(olItem.uri)
  4642. page = resolve[0] + 1
  4643. else:
  4644. page = olItem.page + 1
  4645. else:
  4646. page = -1
  4647. else:
  4648. page = -1
  4649. if not simple:
  4650. link = utils.getLinkDict(olItem, doc)
  4651. liste.append([lvl, title, page, link])
  4652. else:
  4653. liste.append([lvl, title, page])
  4654. if olItem.down:
  4655. liste = recurse(olItem.down, liste, lvl + 1)
  4656. olItem = olItem.next
  4657. return liste
  4658. # ensure document is open
  4659. if doc.is_closed:
  4660. raise ValueError("document closed")
  4661. doc.init_doc()
  4662. olItem = doc.outline
  4663. if not olItem:
  4664. return []
  4665. lvl = 1
  4666. liste = []
  4667. toc = recurse(olItem, liste, lvl)
  4668. if doc.is_pdf and not simple:
  4669. doc._extend_toc_items(toc)
  4670. return toc
  4671. def get_xml_metadata(self):
  4672. """Get document XML metadata."""
  4673. xml = None
  4674. pdf = _as_pdf_document(self, required=0)
  4675. if pdf.m_internal:
  4676. xml = mupdf.pdf_dict_getl(
  4677. mupdf.pdf_trailer(pdf),
  4678. PDF_NAME('Root'),
  4679. PDF_NAME('Metadata'),
  4680. )
  4681. if xml is not None and xml.m_internal:
  4682. buff = mupdf.pdf_load_stream(xml)
  4683. rc = JM_UnicodeFromBuffer(buff)
  4684. else:
  4685. rc = ''
  4686. return rc
  4687. def has_annots(doc: 'Document') -> bool:
  4688. """Check whether there are annotations on any page."""
  4689. if doc.is_closed:
  4690. raise ValueError("document closed")
  4691. if not doc.is_pdf:
  4692. raise ValueError("is no PDF")
  4693. for i in range(doc.page_count):
  4694. for item in doc.page_annot_xrefs(i):
  4695. # pylint: disable=no-member
  4696. if not (item[1] == mupdf.PDF_ANNOT_LINK or item[1] == mupdf.PDF_ANNOT_WIDGET): # pylint: disable=no-member
  4697. return True
  4698. return False
  4699. def has_links(doc: 'Document') -> bool:
  4700. """Check whether there are links on any page."""
  4701. if doc.is_closed:
  4702. raise ValueError("document closed")
  4703. if not doc.is_pdf:
  4704. raise ValueError("is no PDF")
  4705. for i in range(doc.page_count):
  4706. for item in doc.page_annot_xrefs(i):
  4707. if item[1] == mupdf.PDF_ANNOT_LINK: # pylint: disable=no-member
  4708. return True
  4709. return False
  4710. def init_doc(self):
  4711. if self.is_encrypted:
  4712. raise ValueError("cannot initialize - document still encrypted")
  4713. self._outline = self._loadOutline()
  4714. self.metadata = dict(
  4715. [
  4716. (k,self._getMetadata(v)) for k,v in {
  4717. 'format':'format',
  4718. 'title':'info:Title',
  4719. 'author':'info:Author',
  4720. 'subject':'info:Subject',
  4721. 'keywords':'info:Keywords',
  4722. 'creator':'info:Creator',
  4723. 'producer':'info:Producer',
  4724. 'creationDate':'info:CreationDate',
  4725. 'modDate':'info:ModDate',
  4726. 'trapped':'info:Trapped'
  4727. }.items()
  4728. ]
  4729. )
  4730. self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption')
  4731. def insert_file(self,
  4732. infile,
  4733. from_page=-1,
  4734. to_page=-1,
  4735. start_at=-1,
  4736. rotate=-1,
  4737. links=True,
  4738. annots=True,
  4739. show_progress=0,
  4740. final=1,
  4741. ):
  4742. '''
  4743. Insert an arbitrary supported document to an existing PDF.
  4744. The infile may be given as a filename, a Document or a Pixmap. Other
  4745. parameters - where applicable - equal those of insert_pdf().
  4746. '''
  4747. src = None
  4748. if isinstance(infile, Pixmap):
  4749. if infile.colorspace.n > 3:
  4750. infile = Pixmap(csRGB, infile)
  4751. src = Document("png", infile.tobytes())
  4752. elif isinstance(infile, Document):
  4753. src = infile
  4754. else:
  4755. src = Document(infile)
  4756. if not src:
  4757. raise ValueError("bad infile parameter")
  4758. if not src.is_pdf:
  4759. pdfbytes = src.convert_to_pdf()
  4760. src = Document("pdf", pdfbytes)
  4761. return self.insert_pdf(
  4762. src,
  4763. from_page=from_page,
  4764. to_page=to_page,
  4765. start_at=start_at,
  4766. rotate=rotate,
  4767. links=links,
  4768. annots=annots,
  4769. show_progress=show_progress,
  4770. final=final,
  4771. )
  4772. def insert_page(
  4773. doc: 'Document',
  4774. pno: int,
  4775. text: typing.Union[str, list, None] = None,
  4776. fontsize: float = 11,
  4777. width: float = 595,
  4778. height: float = 842,
  4779. fontname: str = "helv",
  4780. fontfile: OptStr = None,
  4781. color: OptSeq = (0,),
  4782. ) -> int:
  4783. """Create a new PDF page and insert some text.
  4784. Notes:
  4785. Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text().
  4786. For parameter details see these methods.
  4787. """
  4788. page = doc.new_page(pno=pno, width=width, height=height)
  4789. if not bool(text):
  4790. return 0
  4791. rc = page.insert_text(
  4792. (50, 72),
  4793. text,
  4794. fontsize=fontsize,
  4795. fontname=fontname,
  4796. fontfile=fontfile,
  4797. color=color,
  4798. )
  4799. return rc
  4800. def insert_pdf(
  4801. self,
  4802. docsrc,
  4803. *,
  4804. from_page=-1,
  4805. to_page=-1,
  4806. start_at=-1,
  4807. rotate=-1,
  4808. links=1,
  4809. annots=1,
  4810. widgets=1,
  4811. join_duplicates=0,
  4812. show_progress=0,
  4813. final=1,
  4814. _gmap=None,
  4815. ):
  4816. """Insert a page range from another PDF.
  4817. Args:
  4818. docsrc: PDF to copy from. Must be different object, but may be same file.
  4819. from_page: (int) first source page to copy, 0-based, default 0.
  4820. to_page: (int) last source page to copy, 0-based, default last page.
  4821. start_at: (int) from_page will become this page number in target.
  4822. rotate: (int) rotate copied pages, default -1 is no change.
  4823. links: (int/bool) whether to also copy links.
  4824. annots: (int/bool) whether to also copy annotations.
  4825. widgets: (int/bool) whether to also copy form fields.
  4826. join_duplicates: (int/bool) join or rename duplicate widget names.
  4827. show_progress: (int) progress message interval, 0 is no messages.
  4828. final: (bool) indicates last insertion from this source PDF.
  4829. _gmap: internal use only
  4830. Copy sequence reversed if from_page > to_page."""
  4831. # Insert pages from a source PDF into this PDF.
  4832. # For reconstructing the links (_do_links method), we must save the
  4833. # insertion point (start_at) if it was specified as -1.
  4834. #log( 'insert_pdf(): start')
  4835. if self.is_closed or self.is_encrypted:
  4836. raise ValueError("document closed or encrypted")
  4837. if self._graft_id == docsrc._graft_id:
  4838. raise ValueError("source and target cannot be same object")
  4839. sa = start_at
  4840. if sa < 0:
  4841. sa = self.page_count
  4842. outCount = self.page_count
  4843. srcCount = docsrc.page_count
  4844. # local copies of page numbers
  4845. fp = from_page
  4846. tp = to_page
  4847. sa = start_at
  4848. # normalize page numbers
  4849. fp = max(fp, 0) # -1 = first page
  4850. fp = min(fp, srcCount - 1) # but do not exceed last page
  4851. if tp < 0:
  4852. tp = srcCount - 1 # -1 = last page
  4853. tp = min(tp, srcCount - 1) # but do not exceed last page
  4854. if sa < 0:
  4855. sa = outCount # -1 = behind last page
  4856. sa = min(sa, outCount) # but that is also the limit
  4857. if len(docsrc) > show_progress > 0:
  4858. inname = os.path.basename(docsrc.name)
  4859. if not inname:
  4860. inname = "memory PDF"
  4861. outname = os.path.basename(self.name)
  4862. if not outname:
  4863. outname = "memory PDF"
  4864. message("Inserting '%s' at '%s'" % (inname, outname))
  4865. # retrieve / make a Graftmap to avoid duplicate objects
  4866. #log( 'insert_pdf(): Graftmaps')
  4867. isrt = docsrc._graft_id
  4868. _gmap = self.Graftmaps.get(isrt, None)
  4869. if _gmap is None:
  4870. #log( 'insert_pdf(): Graftmaps2')
  4871. _gmap = Graftmap(self)
  4872. self.Graftmaps[isrt] = _gmap
  4873. if g_use_extra:
  4874. #log( 'insert_pdf(): calling extra_FzDocument_insert_pdf()')
  4875. extra_FzDocument_insert_pdf(
  4876. self.this,
  4877. docsrc.this,
  4878. from_page,
  4879. to_page,
  4880. start_at,
  4881. rotate,
  4882. links,
  4883. annots,
  4884. show_progress,
  4885. final,
  4886. _gmap,
  4887. )
  4888. #log( 'insert_pdf(): extra_FzDocument_insert_pdf() returned.')
  4889. else:
  4890. pdfout = _as_pdf_document(self)
  4891. pdfsrc = _as_pdf_document(docsrc)
  4892. if not pdfout.m_internal or not pdfsrc.m_internal:
  4893. raise TypeError( "source or target not a PDF")
  4894. ENSURE_OPERATION(pdfout)
  4895. JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap)
  4896. #log( 'insert_pdf(): calling self._reset_page_refs()')
  4897. self._reset_page_refs()
  4898. if links:
  4899. #log( 'insert_pdf(): calling self._do_links()')
  4900. self._do_links(docsrc, from_page=fp, to_page=tp, start_at=sa)
  4901. if widgets:
  4902. self._do_widgets(docsrc, _gmap, from_page=fp, to_page=tp, start_at=sa, join_duplicates=join_duplicates)
  4903. if final == 1:
  4904. self.Graftmaps[isrt] = None
  4905. #log( 'insert_pdf(): returning')
  4906. @property
  4907. def is_dirty(self):
  4908. pdf = _as_pdf_document(self, required=0)
  4909. if not pdf.m_internal:
  4910. return False
  4911. r = mupdf.pdf_has_unsaved_changes(pdf)
  4912. return True if r else False
  4913. @property
  4914. def is_fast_webaccess(self):
  4915. '''
  4916. Check whether we have a linearized PDF.
  4917. '''
  4918. pdf = _as_pdf_document(self, required=0)
  4919. if pdf.m_internal:
  4920. return mupdf.pdf_doc_was_linearized(pdf)
  4921. return False # gracefully handle non-PDF
  4922. @property
  4923. def is_form_pdf(self):
  4924. """Either False or PDF field count."""
  4925. pdf = _as_pdf_document(self, required=0)
  4926. if not pdf.m_internal:
  4927. return False
  4928. count = -1
  4929. try:
  4930. fields = mupdf.pdf_dict_getl(
  4931. mupdf.pdf_trailer(pdf),
  4932. mupdf.PDF_ENUM_NAME_Root,
  4933. mupdf.PDF_ENUM_NAME_AcroForm,
  4934. mupdf.PDF_ENUM_NAME_Fields,
  4935. )
  4936. if mupdf.pdf_is_array(fields):
  4937. count = mupdf.pdf_array_len(fields)
  4938. except Exception:
  4939. if g_exceptions_verbose: exception_info()
  4940. return False
  4941. if count >= 0:
  4942. return count
  4943. return False
  4944. @property
  4945. def is_pdf(self):
  4946. """Check for PDF."""
  4947. if isinstance(self.this, mupdf.PdfDocument):
  4948. return True
  4949. # Avoid calling smupdf.pdf_specifics because it will end up creating
  4950. # a new PdfDocument which will call pdf_create_document(), which is ok
  4951. # but a little unnecessary.
  4952. #
  4953. if mupdf.ll_pdf_specifics(self.this.m_internal):
  4954. ret = True
  4955. else:
  4956. ret = False
  4957. return ret
  4958. @property
  4959. def is_reflowable(self):
  4960. """Check if document is layoutable."""
  4961. if self.is_closed:
  4962. raise ValueError("document closed")
  4963. return bool(mupdf.fz_is_document_reflowable(self))
  4964. @property
  4965. def is_repaired(self):
  4966. """Check whether PDF was repaired."""
  4967. pdf = _as_pdf_document(self, required=0)
  4968. if not pdf.m_internal:
  4969. return False
  4970. r = mupdf.pdf_was_repaired(pdf)
  4971. if r:
  4972. return True
  4973. return False
  4974. def journal_can_do(self):
  4975. """Show if undo and / or redo are possible."""
  4976. if self.is_closed or self.is_encrypted:
  4977. raise ValueError("document closed or encrypted")
  4978. undo=0
  4979. redo=0
  4980. pdf = _as_pdf_document(self)
  4981. undo = mupdf.pdf_can_undo(pdf)
  4982. redo = mupdf.pdf_can_redo(pdf)
  4983. return {'undo': bool(undo), 'redo': bool(redo)}
  4984. def journal_enable(self):
  4985. """Activate document journalling."""
  4986. if self.is_closed or self.is_encrypted:
  4987. raise ValueError("document closed or encrypted")
  4988. pdf = _as_pdf_document(self)
  4989. mupdf.pdf_enable_journal(pdf)
  4990. def journal_is_enabled(self):
  4991. """Check if journalling is enabled."""
  4992. if self.is_closed or self.is_encrypted:
  4993. raise ValueError("document closed or encrypted")
  4994. pdf = _as_pdf_document(self)
  4995. enabled = pdf.m_internal and pdf.m_internal.journal
  4996. return enabled
  4997. def journal_load(self, filename):
  4998. """Load a journal from a file."""
  4999. if self.is_closed or self.is_encrypted:
  5000. raise ValueError("document closed or encrypted")
  5001. pdf = _as_pdf_document(self)
  5002. if isinstance(filename, str):
  5003. mupdf.pdf_load_journal(pdf, filename)
  5004. else:
  5005. res = JM_BufferFromBytes(filename)
  5006. stm = mupdf.fz_open_buffer(res)
  5007. mupdf.pdf_deserialise_journal(pdf, stm)
  5008. if not pdf.m_internal.journal:
  5009. RAISEPY( "Journal and document do not match", JM_Exc_FileDataError)
  5010. def journal_op_name(self, step):
  5011. """Show operation name for given step."""
  5012. if self.is_closed or self.is_encrypted:
  5013. raise ValueError("document closed or encrypted")
  5014. pdf = _as_pdf_document(self)
  5015. name = mupdf.pdf_undoredo_step(pdf, step)
  5016. return name
  5017. def journal_position(self):
  5018. """Show journalling state."""
  5019. if self.is_closed or self.is_encrypted:
  5020. raise ValueError("document closed or encrypted")
  5021. steps=0
  5022. pdf = _as_pdf_document(self)
  5023. rc, steps = mupdf.pdf_undoredo_state(pdf)
  5024. return rc, steps
  5025. def journal_redo(self):
  5026. """Move forward in the journal."""
  5027. if self.is_closed or self.is_encrypted:
  5028. raise ValueError("document closed or encrypted")
  5029. pdf = _as_pdf_document(self)
  5030. mupdf.pdf_redo(pdf)
  5031. return True
  5032. def journal_save(self, filename):
  5033. """Save journal to a file."""
  5034. if self.is_closed or self.is_encrypted:
  5035. raise ValueError("document closed or encrypted")
  5036. pdf = _as_pdf_document(self)
  5037. if isinstance(filename, str):
  5038. mupdf.pdf_save_journal(pdf, filename)
  5039. else:
  5040. out = JM_new_output_fileptr(filename)
  5041. mupdf.pdf_write_journal(pdf, out)
  5042. out.fz_close_output()
  5043. def journal_start_op(self, name=None):
  5044. """Begin a journalling operation."""
  5045. if self.is_closed or self.is_encrypted:
  5046. raise ValueError("document closed or encrypted")
  5047. pdf = _as_pdf_document(self)
  5048. if not pdf.m_internal.journal:
  5049. raise RuntimeError( "Journalling not enabled")
  5050. if name:
  5051. mupdf.pdf_begin_operation(pdf, name)
  5052. else:
  5053. mupdf.pdf_begin_implicit_operation(pdf)
  5054. def journal_stop_op(self):
  5055. """End a journalling operation."""
  5056. if self.is_closed or self.is_encrypted:
  5057. raise ValueError("document closed or encrypted")
  5058. pdf = _as_pdf_document(self)
  5059. mupdf.pdf_end_operation(pdf)
  5060. def journal_undo(self):
  5061. """Move backwards in the journal."""
  5062. if self.is_closed or self.is_encrypted:
  5063. raise ValueError("document closed or encrypted")
  5064. pdf = _as_pdf_document(self)
  5065. mupdf.pdf_undo(pdf)
  5066. return True
  5067. @property
  5068. def language(self):
  5069. """Document language."""
  5070. pdf = _as_pdf_document(self, required=0)
  5071. if not pdf.m_internal:
  5072. return
  5073. lang = mupdf.pdf_document_language(pdf)
  5074. if lang == mupdf.FZ_LANG_UNSET:
  5075. return
  5076. return mupdf.fz_string_from_text_language2(lang)
  5077. @property
  5078. def last_location(self):
  5079. """Id (chapter, page) of last page."""
  5080. if self.is_closed:
  5081. raise ValueError("document closed")
  5082. last_loc = mupdf.fz_last_page(self.this)
  5083. return last_loc.chapter, last_loc.page
  5084. def layer_ui_configs(self):
  5085. """Show OC visibility status modifiable by user."""
  5086. pdf = _as_pdf_document(self)
  5087. info = mupdf.PdfLayerConfigUi()
  5088. n = mupdf.pdf_count_layer_config_ui( pdf)
  5089. rc = []
  5090. for i in range(n):
  5091. mupdf.pdf_layer_config_ui_info( pdf, i, info)
  5092. if info.type == 1:
  5093. type_ = "checkbox"
  5094. elif info.type == 2:
  5095. type_ = "radiobox"
  5096. else:
  5097. type_ = "label"
  5098. item = {
  5099. "number": i,
  5100. "text": info.text,
  5101. "depth": info.depth,
  5102. "type": type_,
  5103. "on": info.selected,
  5104. "locked": info.locked,
  5105. }
  5106. rc.append(item)
  5107. return rc
  5108. def layout(self, rect=None, width=0, height=0, fontsize=11):
  5109. """Re-layout a reflowable document."""
  5110. if self.is_closed or self.is_encrypted:
  5111. raise ValueError("document closed or encrypted")
  5112. doc = self.this
  5113. if not mupdf.fz_is_document_reflowable( doc):
  5114. return
  5115. w = width
  5116. h = height
  5117. r = JM_rect_from_py(rect)
  5118. if not mupdf.fz_is_infinite_rect(r):
  5119. w = r.x1 - r.x0
  5120. h = r.y1 - r.y0
  5121. if w <= 0.0 or h <= 0.0:
  5122. raise ValueError( "bad page size")
  5123. mupdf.fz_layout_document( doc, w, h, fontsize)
  5124. self._reset_page_refs()
  5125. self.init_doc()
  5126. def load_page(self, page_id):
  5127. """Load a page.
  5128. 'page_id' is either a 0-based page number or a tuple (chapter, pno),
  5129. with chapter number and page number within that chapter.
  5130. """
  5131. if self.is_closed or self.is_encrypted:
  5132. raise ValueError("document closed or encrypted")
  5133. if page_id is None:
  5134. page_id = 0
  5135. if page_id not in self:
  5136. raise ValueError("page not in document")
  5137. if type(page_id) is int and page_id < 0:
  5138. np = self.page_count
  5139. while page_id < 0:
  5140. page_id += np
  5141. if isinstance(page_id, int):
  5142. page = mupdf.fz_load_page(self.this, page_id)
  5143. else:
  5144. chapter, pagenum = page_id
  5145. page = mupdf.fz_load_chapter_page(self.this, chapter, pagenum)
  5146. val = Page(page, self)
  5147. val.thisown = True
  5148. val.parent = self
  5149. self._page_refs[id(val)] = val
  5150. val._annot_refs = weakref.WeakValueDictionary()
  5151. val.number = page_id
  5152. return val
  5153. def location_from_page_number(self, pno):
  5154. """Convert pno to (chapter, page)."""
  5155. if self.is_closed:
  5156. raise ValueError("document closed")
  5157. this_doc = self.this
  5158. loc = mupdf.fz_make_location(-1, -1)
  5159. page_count = mupdf.fz_count_pages(this_doc)
  5160. while pno < 0:
  5161. pno += page_count
  5162. if pno >= page_count:
  5163. raise ValueError( MSG_BAD_PAGENO)
  5164. loc = mupdf.fz_location_from_page_number(this_doc, pno)
  5165. return loc.chapter, loc.page
  5166. def make_bookmark(self, loc):
  5167. """Make a page pointer before layouting document."""
  5168. if self.is_closed or self.is_encrypted:
  5169. raise ValueError("document closed or encrypted")
  5170. loc = mupdf.FzLocation(*loc)
  5171. mark = mupdf.ll_fz_make_bookmark2( self.this.m_internal, loc.internal())
  5172. return mark
  5173. @property
  5174. def markinfo(self) -> dict:
  5175. """Return the PDF MarkInfo value."""
  5176. xref = self.pdf_catalog()
  5177. if xref == 0:
  5178. return None
  5179. rc = self.xref_get_key(xref, "MarkInfo")
  5180. if rc[0] == "null":
  5181. return {}
  5182. if rc[0] == "xref":
  5183. xref = int(rc[1].split()[0])
  5184. val = self.xref_object(xref, compressed=True)
  5185. elif rc[0] == "dict":
  5186. val = rc[1]
  5187. else:
  5188. val = None
  5189. if val is None or not (val[:2] == "<<" and val[-2:] == ">>"):
  5190. return {}
  5191. valid = {"Marked": False, "UserProperties": False, "Suspects": False}
  5192. val = val[2:-2].split("/")
  5193. for v in val[1:]:
  5194. try:
  5195. key, value = v.split()
  5196. except Exception:
  5197. if g_exceptions_verbose > 1: exception_info()
  5198. return valid
  5199. if value == "true":
  5200. valid[key] = True
  5201. return valid
  5202. def move_page(self, pno: int, to: int =-1):
  5203. """Move a page within a PDF document.
  5204. Args:
  5205. pno: source page number.
  5206. to: put before this page, '-1' means after last page.
  5207. """
  5208. if self.is_closed:
  5209. raise ValueError("document closed")
  5210. page_count = len(self)
  5211. if (pno not in range(page_count) or to not in range(-1, page_count)):
  5212. raise ValueError("bad page number(s)")
  5213. before = 1
  5214. copy = 0
  5215. if to == -1:
  5216. to = page_count - 1
  5217. before = 0
  5218. return self._move_copy_page(pno, to, before, copy)
  5219. @property
  5220. def name(self):
  5221. return self._name
  5222. def need_appearances(self, value=None):
  5223. """Get/set the NeedAppearances value."""
  5224. if not self.is_form_pdf:
  5225. return None
  5226. pdf = _as_pdf_document(self)
  5227. oldval = -1
  5228. appkey = "NeedAppearances"
  5229. form = mupdf.pdf_dict_getp(
  5230. mupdf.pdf_trailer(pdf),
  5231. "Root/AcroForm",
  5232. )
  5233. app = mupdf.pdf_dict_gets(form, appkey)
  5234. if mupdf.pdf_is_bool(app):
  5235. oldval = mupdf.pdf_to_bool(app)
  5236. if value:
  5237. mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_TRUE)
  5238. else:
  5239. mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_FALSE)
  5240. if value is None:
  5241. return oldval >= 0
  5242. return value
  5243. @property
  5244. def needs_pass(self):
  5245. """Indicate password required."""
  5246. if self.is_closed:
  5247. raise ValueError("document closed")
  5248. document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super()
  5249. ret = mupdf.fz_needs_password( document)
  5250. return ret
  5251. def new_page(
  5252. doc: 'Document',
  5253. pno: int = -1,
  5254. width: float = 595,
  5255. height: float = 842,
  5256. ) -> Page:
  5257. """Create and return a new page object.
  5258. Args:
  5259. pno: (int) insert before this page. Default: after last page.
  5260. width: (float) page width in points. Default: 595 (ISO A4 width).
  5261. height: (float) page height in points. Default 842 (ISO A4 height).
  5262. Returns:
  5263. A pymupdf.Page object.
  5264. """
  5265. doc._newPage(pno, width=width, height=height)
  5266. return doc[pno]
  5267. def next_location(self, page_id):
  5268. """Get (chapter, page) of next page."""
  5269. if self.is_closed or self.is_encrypted:
  5270. raise ValueError("document closed or encrypted")
  5271. if type(page_id) is int:
  5272. page_id = (0, page_id)
  5273. if page_id not in self:
  5274. raise ValueError("page id not in document")
  5275. if tuple(page_id) == self.last_location:
  5276. return ()
  5277. this_doc = _as_fz_document(self)
  5278. val = page_id[ 0]
  5279. if not isinstance(val, int):
  5280. RAISEPY(MSG_BAD_PAGEID, PyExc_ValueError)
  5281. chapter = val
  5282. val = page_id[ 1]
  5283. pno = val
  5284. loc = mupdf.fz_make_location(chapter, pno)
  5285. next_loc = mupdf.fz_next_page( this_doc, loc)
  5286. return next_loc.chapter, next_loc.page
  5287. def page_annot_xrefs(self, n):
  5288. if g_use_extra:
  5289. return extra.page_annot_xrefs( self.this, n)
  5290. if isinstance(self.this, mupdf.PdfDocument):
  5291. page_count = mupdf.pdf_count_pages(self.this)
  5292. pdf_document = self.this
  5293. else:
  5294. page_count = mupdf.fz_count_pages(self.this)
  5295. pdf_document = _as_pdf_document(self)
  5296. while n < 0:
  5297. n += page_count
  5298. if n > page_count:
  5299. raise ValueError( MSG_BAD_PAGENO)
  5300. page_obj = mupdf.pdf_lookup_page_obj(pdf_document, n)
  5301. annots = JM_get_annot_xref_list(page_obj)
  5302. return annots
  5303. @property
  5304. def page_count(self):
  5305. """Number of pages."""
  5306. if self.is_closed:
  5307. raise ValueError('document closed')
  5308. if g_use_extra:
  5309. return self.page_count2(self)
  5310. if isinstance( self.this, mupdf.FzDocument):
  5311. return mupdf.fz_count_pages( self.this)
  5312. else:
  5313. return mupdf.pdf_count_pages( self.this)
  5314. def page_cropbox(self, pno):
  5315. """Get CropBox of page number (without loading page)."""
  5316. if self.is_closed:
  5317. raise ValueError("document closed")
  5318. this_doc = self.this
  5319. page_count = mupdf.fz_count_pages( this_doc)
  5320. n = pno
  5321. while n < 0:
  5322. n += page_count
  5323. pdf = _as_pdf_document(self)
  5324. if n >= page_count:
  5325. raise ValueError( MSG_BAD_PAGENO)
  5326. pageref = mupdf.pdf_lookup_page_obj( pdf, n)
  5327. cropbox = JM_cropbox(pageref)
  5328. val = JM_py_from_rect(cropbox)
  5329. val = Rect(val)
  5330. return val
  5331. def page_number_from_location(self, page_id):
  5332. """Convert (chapter, pno) to page number."""
  5333. if type(page_id) is int:
  5334. np = self.page_count
  5335. while page_id < 0:
  5336. page_id += np
  5337. page_id = (0, page_id)
  5338. if page_id not in self:
  5339. raise ValueError("page id not in document")
  5340. chapter, pno = page_id
  5341. loc = mupdf.fz_make_location( chapter, pno)
  5342. page_n = mupdf.fz_page_number_from_location( self.this, loc)
  5343. return page_n
  5344. def page_xref(self, pno):
  5345. """Get xref of page number."""
  5346. if g_use_extra:
  5347. return extra.page_xref( self.this, pno)
  5348. if self.is_closed:
  5349. raise ValueError("document closed")
  5350. page_count = mupdf.fz_count_pages(self.this)
  5351. n = pno
  5352. while n < 0:
  5353. n += page_count
  5354. pdf = _as_pdf_document(self)
  5355. xref = 0
  5356. if n >= page_count:
  5357. raise ValueError( MSG_BAD_PAGENO)
  5358. xref = mupdf.pdf_to_num(mupdf.pdf_lookup_page_obj(pdf, n))
  5359. return xref
  5360. @property
  5361. def pagelayout(self) -> str:
  5362. """Return the PDF PageLayout value.
  5363. """
  5364. xref = self.pdf_catalog()
  5365. if xref == 0:
  5366. return None
  5367. rc = self.xref_get_key(xref, "PageLayout")
  5368. if rc[0] == "null":
  5369. return "SinglePage"
  5370. if rc[0] == "name":
  5371. return rc[1][1:]
  5372. return "SinglePage"
  5373. @property
  5374. def pagemode(self) -> str:
  5375. """Return the PDF PageMode value.
  5376. """
  5377. xref = self.pdf_catalog()
  5378. if xref == 0:
  5379. return None
  5380. rc = self.xref_get_key(xref, "PageMode")
  5381. if rc[0] == "null":
  5382. return "UseNone"
  5383. if rc[0] == "name":
  5384. return rc[1][1:]
  5385. return "UseNone"
  5386. if sys.implementation.version < (3, 9):
  5387. # Appending `[Page]` causes `TypeError: 'ABCMeta' object is not subscriptable`.
  5388. _pages_ret = collections.abc.Iterable
  5389. else:
  5390. _pages_ret = collections.abc.Iterable[Page]
  5391. def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None) -> _pages_ret:
  5392. """Return a generator iterator over a page range.
  5393. Arguments have the same meaning as for the range() built-in.
  5394. """
  5395. if not self.page_count:
  5396. return
  5397. # set the start value
  5398. start = start or 0
  5399. while start < 0:
  5400. start += self.page_count
  5401. if start not in range(self.page_count):
  5402. raise ValueError("bad start page number")
  5403. # set the stop value
  5404. stop = stop if stop is not None and stop <= self.page_count else self.page_count
  5405. # set the step value
  5406. if step == 0:
  5407. raise ValueError("arg 3 must not be zero")
  5408. if step is None:
  5409. if start > stop:
  5410. step = -1
  5411. else:
  5412. step = 1
  5413. for pno in range(start, stop, step):
  5414. yield (self.load_page(pno))
  5415. def pdf_catalog(self):
  5416. """Get xref of PDF catalog."""
  5417. pdf = _as_pdf_document(self, required=0)
  5418. xref = 0
  5419. if not pdf.m_internal:
  5420. return xref
  5421. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  5422. xref = mupdf.pdf_to_num(root)
  5423. return xref
  5424. def pdf_trailer(self, compressed=0, ascii=0):
  5425. """Get PDF trailer as a string."""
  5426. return self.xref_object(-1, compressed=compressed, ascii=ascii)
  5427. @property
  5428. def permissions(self):
  5429. """Document permissions."""
  5430. if self.is_encrypted:
  5431. return 0
  5432. doc =self.this
  5433. pdf = mupdf.pdf_document_from_fz_document(doc)
  5434. # for PDF return result of standard function
  5435. if pdf.m_internal:
  5436. return mupdf.pdf_document_permissions(pdf)
  5437. # otherwise simulate the PDF return value
  5438. perm = 0xFFFFFFFC # all permissions granted
  5439. # now switch off where needed
  5440. if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_PRINT):
  5441. perm = perm ^ mupdf.PDF_PERM_PRINT
  5442. if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_EDIT):
  5443. perm = perm ^ mupdf.PDF_PERM_MODIFY
  5444. if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_COPY):
  5445. perm = perm ^ mupdf.PDF_PERM_COPY
  5446. if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_ANNOTATE):
  5447. perm = perm ^ mupdf.PDF_PERM_ANNOTATE
  5448. return perm
  5449. def prev_location(self, page_id):
  5450. """Get (chapter, page) of previous page."""
  5451. if self.is_closed or self.is_encrypted:
  5452. raise ValueError("document closed or encrypted")
  5453. if type(page_id) is int:
  5454. page_id = (0, page_id)
  5455. if page_id not in self:
  5456. raise ValueError("page id not in document")
  5457. if page_id == (0, 0):
  5458. return ()
  5459. chapter, pno = page_id
  5460. loc = mupdf.fz_make_location(chapter, pno)
  5461. prev_loc = mupdf.fz_previous_page(self.this, loc)
  5462. return prev_loc.chapter, prev_loc.page
  5463. def reload_page(self, page: Page) -> Page:
  5464. """Make a fresh copy of a page."""
  5465. old_annots = {} # copy annot references to here
  5466. pno = page.number # save the page number
  5467. for k, v in page._annot_refs.items(): # save the annot dictionary
  5468. old_annots[k] = v
  5469. # When we call `self.load_page()` below, it will end up in
  5470. # fz_load_chapter_page(), which will return any matching page in the
  5471. # document's list of non-ref-counted loaded pages, instead of actually
  5472. # reloading the page.
  5473. #
  5474. # We want to assert that we have actually reloaded the fz_page, and not
  5475. # simply returned the same `fz_page*` pointer from the document's list
  5476. # of non-ref-counted loaded pages.
  5477. #
  5478. # So we first remove our reference to the `fz_page*`. This will
  5479. # decrement .refs, and if .refs was 1, this is guaranteed to free the
  5480. # `fz_page*` and remove it from the document's list if it was there. So
  5481. # we are guaranteed that our returned `fz_page*` is from a genuine
  5482. # reload, even if it happens to reuse the original block of memory.
  5483. #
  5484. # However if the original .refs is greater than one, there must be
  5485. # other references to the `fz_page` somewhere, and we require that
  5486. # these other references are not keeping the page in the document's
  5487. # list. We check that we are returning a newly loaded page by
  5488. # asserting that our returned `fz_page*` is different from the original
  5489. # `fz_page*` - the original was not freed, so a new `fz_page` cannot
  5490. # reuse the same block of memory.
  5491. #
  5492. refs_old = page.this.m_internal.refs
  5493. m_internal_old = page.this.m_internal_value()
  5494. page.this = None
  5495. page._erase() # remove the page
  5496. page = None
  5497. TOOLS.store_shrink(100)
  5498. page = self.load_page(pno) # reload the page
  5499. # copy annot refs over to the new dictionary
  5500. #page_proxy = weakref.proxy(page)
  5501. for k, v in old_annots.items():
  5502. annot = old_annots[k]
  5503. #annot.parent = page_proxy # refresh parent to new page
  5504. page._annot_refs[k] = annot
  5505. if refs_old == 1:
  5506. # We know that `page.this = None` will have decremented the ref
  5507. # count to zero so we are guaranteed that the new `fz_page` is a
  5508. # new page even if it happens to have reused the same block of
  5509. # memory.
  5510. pass
  5511. else:
  5512. # Check that the new `fz_page*` is different from the original.
  5513. m_internal_new = page.this.m_internal_value()
  5514. assert m_internal_new != m_internal_old, \
  5515. f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}'
  5516. return page
  5517. def resolve_link(self, uri=None, chapters=0):
  5518. """Calculate internal link destination.
  5519. Args:
  5520. uri: (str) some Link.uri
  5521. chapters: (bool) whether to use (chapter, page) format
  5522. Returns:
  5523. (page_id, x, y) where x, y are point coordinates on the page.
  5524. page_id is either page number (if chapters=0), or (chapter, pno).
  5525. """
  5526. if not uri:
  5527. if chapters:
  5528. return (-1, -1), 0, 0
  5529. return -1, 0, 0
  5530. try:
  5531. loc, xp, yp = mupdf.fz_resolve_link(self.this, uri)
  5532. except Exception:
  5533. if g_exceptions_verbose: exception_info()
  5534. if chapters:
  5535. return (-1, -1), 0, 0
  5536. return -1, 0, 0
  5537. if chapters:
  5538. return (loc.chapter, loc.page), xp, yp
  5539. pno = mupdf.fz_page_number_from_location(self.this, loc)
  5540. return pno, xp, yp
  5541. def rewrite_images(
  5542. self,
  5543. dpi_threshold=None,
  5544. dpi_target=0,
  5545. quality=0,
  5546. lossy=True,
  5547. lossless=True,
  5548. bitonal=True,
  5549. color=True,
  5550. gray=True,
  5551. set_to_gray=False,
  5552. options=None,
  5553. ):
  5554. """Rewrite images in a PDF document.
  5555. The typical use case is to reduce the size of the PDF by recompressing
  5556. images. Default parameters will convert all images to JPEG where
  5557. possible, using the specified resolutions and quality. Exclude
  5558. undesired images by setting parameters to False.
  5559. Args:
  5560. dpi_threshold: look at images with a larger DPI only.
  5561. dpi_target: change eligible images to this DPI.
  5562. quality: Quality of the recompressed images (0-100).
  5563. lossy: process lossy image types (e.g. JPEG).
  5564. lossless: process lossless image types (e.g. PNG).
  5565. bitonal: process black-and-white images (e.g. FAX)
  5566. color: process colored images.
  5567. gray: process gray images.
  5568. set_to_gray: whether to change the PDF to gray at process start.
  5569. options: (PdfImageRewriterOptions) Custom options for image
  5570. rewriting (optional). Expert use only. If provided, other
  5571. parameters are ignored, except set_to_gray.
  5572. """
  5573. quality_str = str(quality)
  5574. if not dpi_threshold:
  5575. dpi_threshold = dpi_target = 0
  5576. if dpi_target > 0 and dpi_target >= dpi_threshold:
  5577. raise ValueError("{dpi_target=} must be less than {dpi_threshold=}")
  5578. template_opts = mupdf.PdfImageRewriterOptions()
  5579. dir1 = set(dir(template_opts)) # for checking that only existing options are set
  5580. if not options:
  5581. opts = mupdf.PdfImageRewriterOptions()
  5582. if bitonal:
  5583. opts.bitonal_image_recompress_method = mupdf.FZ_RECOMPRESS_FAX
  5584. opts.bitonal_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  5585. opts.bitonal_image_subsample_to = dpi_target
  5586. opts.bitonal_image_recompress_quality = quality_str
  5587. opts.bitonal_image_subsample_threshold = dpi_threshold
  5588. if color:
  5589. if lossless:
  5590. opts.color_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
  5591. opts.color_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  5592. opts.color_lossless_image_subsample_to = dpi_target
  5593. opts.color_lossless_image_subsample_threshold = dpi_threshold
  5594. opts.color_lossless_image_recompress_quality = quality_str
  5595. if lossy:
  5596. opts.color_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
  5597. opts.color_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  5598. opts.color_lossy_image_subsample_threshold = dpi_threshold
  5599. opts.color_lossy_image_subsample_to = dpi_target
  5600. opts.color_lossy_image_recompress_quality = quality_str
  5601. if gray:
  5602. if lossless:
  5603. opts.gray_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
  5604. opts.gray_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  5605. opts.gray_lossless_image_subsample_to = dpi_target
  5606. opts.gray_lossless_image_subsample_threshold = dpi_threshold
  5607. opts.gray_lossless_image_recompress_quality = quality_str
  5608. if lossy:
  5609. opts.gray_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
  5610. opts.gray_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  5611. opts.gray_lossy_image_subsample_threshold = dpi_threshold
  5612. opts.gray_lossy_image_subsample_to = dpi_target
  5613. opts.gray_lossy_image_recompress_quality = quality_str
  5614. else:
  5615. opts = options
  5616. dir2 = set(dir(opts)) # checking that only possible options were used
  5617. invalid_options = dir2 - dir1
  5618. if invalid_options:
  5619. raise ValueError(f"Invalid options: {invalid_options}")
  5620. if set_to_gray:
  5621. self.recolor(1)
  5622. pdf = _as_pdf_document(self)
  5623. mupdf.pdf_rewrite_images(pdf, opts)
  5624. def recolor(self, components=1):
  5625. """Change the color component count on all pages.
  5626. Args:
  5627. components: (int) desired color component count, one of 1, 3, 4.
  5628. Invokes the same-named method for all pages.
  5629. """
  5630. if not self.is_pdf:
  5631. raise ValueError("is no PDF")
  5632. for i in range(self.page_count):
  5633. self.load_page(i).recolor(components)
  5634. def resolve_names(self):
  5635. """Convert the PDF's destination names into a Python dict.
  5636. The only parameter is the pymupdf.Document.
  5637. All names found in the catalog under keys "/Dests" and "/Names/Dests" are
  5638. being included.
  5639. Returns:
  5640. A dcitionary with the following layout:
  5641. - key: (str) the name
  5642. - value: (dict) with the following layout:
  5643. * "page": target page number (0-based). If no page number found -1.
  5644. * "to": (x, y) target point on page - currently in PDF coordinates,
  5645. i.e. point (0,0) is the bottom-left of the page.
  5646. * "zoom": (float) the zoom factor
  5647. * "dest": (str) only occurs if the target location on the page has
  5648. not been provided as "/XYZ" or if no page number was found.
  5649. Examples:
  5650. {'__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0},
  5651. '__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}}
  5652. or
  5653. '21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, ...
  5654. """
  5655. if hasattr(self, "_resolved_names"): # do not execute multiple times!
  5656. return self._resolved_names
  5657. # this is a backward listing of page xref to page number
  5658. page_xrefs = {self.page_xref(i): i for i in range(self.page_count)}
  5659. def obj_string(obj):
  5660. """Return string version of a PDF object definition."""
  5661. buffer = mupdf.fz_new_buffer(512)
  5662. output = mupdf.FzOutput(buffer)
  5663. mupdf.pdf_print_obj(output, obj, 1, 0)
  5664. output.fz_close_output()
  5665. return JM_UnicodeFromBuffer(buffer)
  5666. def get_array(val):
  5667. """Generate value of one item of the names dictionary."""
  5668. templ_dict = {"page": -1, "dest": ""} # value template
  5669. if val.pdf_is_indirect():
  5670. val = mupdf.pdf_resolve_indirect(val)
  5671. if val.pdf_is_array():
  5672. array = obj_string(val)
  5673. elif val.pdf_is_dict():
  5674. array = obj_string(mupdf.pdf_dict_gets(val, "D"))
  5675. else: # if all fails return the empty template
  5676. return templ_dict
  5677. # replace PDF "null" by zero, omit the square brackets
  5678. array = array.replace("null", "0")[1:-1]
  5679. # find stuff before first "/"
  5680. idx = array.find("/")
  5681. if idx < 1: # this has no target page spec
  5682. templ_dict["dest"] = array # return the orig. string
  5683. return templ_dict
  5684. subval = array[:idx].strip() # stuff before "/"
  5685. array = array[idx:] # stuff from "/" onwards
  5686. templ_dict["dest"] = array
  5687. # if we start with /XYZ: extract x, y, zoom
  5688. # 1, 2 or 3 of these values may actually be supplied
  5689. if array.startswith("/XYZ"):
  5690. del templ_dict["dest"] # don't return orig string in this case
  5691. # make a list of the 3 tokens following "/XYZ"
  5692. array_list = array.split()[1:4] # omit "/XYZ"
  5693. # fill up missing tokens with "0" strings
  5694. while len(array_list) < 3: # fill up if too short
  5695. array_list.append("0") # add missing values
  5696. # make list of 3 floats: x, y and zoom
  5697. t = list(map(float, array_list)) # the resulting x, y, z values
  5698. templ_dict["to"] = (t[0], t[1])
  5699. templ_dict["zoom"] = t[2]
  5700. # extract page number
  5701. if subval.endswith("0 R"): # page xref given?
  5702. templ_dict["page"] = page_xrefs.get(int(subval.split()[0]),-1)
  5703. else: # naked page number given
  5704. templ_dict["page"] = int(subval)
  5705. return templ_dict
  5706. def fill_dict(dest_dict, pdf_dict):
  5707. """Generate name resolution items for pdf_dict.
  5708. This may be either "/Names/Dests" or just "/Dests"
  5709. """
  5710. # length of the PDF dictionary
  5711. name_count = mupdf.pdf_dict_len(pdf_dict)
  5712. # extract key-val of each dict item
  5713. for i in range(name_count):
  5714. key = mupdf.pdf_dict_get_key(pdf_dict, i)
  5715. val = mupdf.pdf_dict_get_val(pdf_dict, i)
  5716. if key.pdf_is_name(): # this should always be true!
  5717. dict_key = key.pdf_to_name()
  5718. else:
  5719. message(f"key {i} is no /Name")
  5720. dict_key = None
  5721. if dict_key:
  5722. dest_dict[dict_key] = get_array(val) # store key/value in dict
  5723. # access underlying PDF document of fz Document
  5724. pdf = mupdf.pdf_document_from_fz_document(self)
  5725. # access PDF catalog
  5726. catalog = mupdf.pdf_dict_gets(mupdf.pdf_trailer(pdf), "Root")
  5727. dest_dict = {}
  5728. # make PDF_NAME(Dests)
  5729. dests = mupdf.pdf_new_name("Dests")
  5730. # extract destinations old style (PDF 1.1)
  5731. old_dests = mupdf.pdf_dict_get(catalog, dests)
  5732. if old_dests.pdf_is_dict():
  5733. fill_dict(dest_dict, old_dests)
  5734. # extract destinations new style (PDF 1.2+)
  5735. tree = mupdf.pdf_load_name_tree(pdf, dests)
  5736. if tree.pdf_is_dict():
  5737. fill_dict(dest_dict, tree)
  5738. self._resolved_names = dest_dict # store result or reuse
  5739. return dest_dict
  5740. def save(
  5741. self,
  5742. filename,
  5743. garbage=0,
  5744. clean=0,
  5745. deflate=0,
  5746. deflate_images=0,
  5747. deflate_fonts=0,
  5748. incremental=0,
  5749. ascii=0,
  5750. expand=0,
  5751. linear=0,
  5752. no_new_id=0,
  5753. appearance=0,
  5754. pretty=0,
  5755. encryption=1,
  5756. permissions=4095,
  5757. owner_pw=None,
  5758. user_pw=None,
  5759. preserve_metadata=1,
  5760. use_objstms=0,
  5761. compression_effort=0,
  5762. ):
  5763. # From %pythonprepend save
  5764. #
  5765. """Save PDF to file, pathlib.Path or file pointer."""
  5766. if self.is_closed or self.is_encrypted:
  5767. raise ValueError("document closed or encrypted")
  5768. if type(filename) is str:
  5769. pass
  5770. elif hasattr(filename, "open"): # assume: pathlib.Path
  5771. filename = str(filename)
  5772. elif hasattr(filename, "name"): # assume: file object
  5773. filename = filename.name
  5774. elif not hasattr(filename, "seek"): # assume file object
  5775. raise ValueError("filename must be str, Path or file object")
  5776. if filename == self.name and not incremental:
  5777. raise ValueError("save to original must be incremental")
  5778. if linear and use_objstms:
  5779. raise ValueError("'linear' and 'use_objstms' cannot both be requested")
  5780. if self.page_count < 1:
  5781. raise ValueError("cannot save with zero pages")
  5782. if incremental:
  5783. if self.name != filename or self.stream:
  5784. raise ValueError("incremental needs original file")
  5785. if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40:
  5786. raise ValueError("password length must not exceed 40")
  5787. pdf = _as_pdf_document(self)
  5788. opts = mupdf.PdfWriteOptions()
  5789. opts.do_incremental = incremental
  5790. opts.do_ascii = ascii
  5791. opts.do_compress = deflate
  5792. opts.do_compress_images = deflate_images
  5793. opts.do_compress_fonts = deflate_fonts
  5794. opts.do_decompress = expand
  5795. opts.do_garbage = garbage
  5796. opts.do_pretty = pretty
  5797. opts.do_linear = linear
  5798. opts.do_clean = clean
  5799. opts.do_sanitize = clean
  5800. opts.dont_regenerate_id = no_new_id
  5801. opts.do_appearance = appearance
  5802. opts.do_encrypt = encryption
  5803. opts.permissions = permissions
  5804. if owner_pw is not None:
  5805. opts.opwd_utf8_set_value(owner_pw)
  5806. elif user_pw is not None:
  5807. opts.opwd_utf8_set_value(user_pw)
  5808. if user_pw is not None:
  5809. opts.upwd_utf8_set_value(user_pw)
  5810. opts.do_preserve_metadata = preserve_metadata
  5811. opts.do_use_objstms = use_objstms
  5812. opts.compression_effort = compression_effort
  5813. out = None
  5814. pdf.m_internal.resynth_required = 0
  5815. JM_embedded_clean(pdf)
  5816. if no_new_id == 0:
  5817. JM_ensure_identity(pdf)
  5818. if isinstance(filename, str):
  5819. #log( 'calling mupdf.pdf_save_document()')
  5820. mupdf.pdf_save_document(pdf, filename, opts)
  5821. else:
  5822. out = JM_new_output_fileptr(filename)
  5823. #log( f'{type(out)=} {type(out.this)=}')
  5824. mupdf.pdf_write_document(pdf, out, opts)
  5825. out.fz_close_output()
  5826. def save_snapshot(self, filename):
  5827. """Save a file snapshot suitable for journalling."""
  5828. if self.is_closed:
  5829. raise ValueError("doc is closed")
  5830. if type(filename) is str:
  5831. pass
  5832. elif hasattr(filename, "open"): # assume: pathlib.Path
  5833. filename = str(filename)
  5834. elif hasattr(filename, "name"): # assume: file object
  5835. filename = filename.name
  5836. else:
  5837. raise ValueError("filename must be str, Path or file object")
  5838. if filename == self.name:
  5839. raise ValueError("cannot snapshot to original")
  5840. pdf = _as_pdf_document(self)
  5841. mupdf.pdf_save_snapshot(pdf, filename)
  5842. def saveIncr(self):
  5843. """ Save PDF incrementally"""
  5844. return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
  5845. # ------------------------------------------------------------------------------
  5846. # Remove potentially sensitive data from a PDF. Similar to the Adobe
  5847. # Acrobat 'sanitize' function
  5848. # ------------------------------------------------------------------------------
  5849. def scrub(
  5850. doc: 'Document',
  5851. attached_files: bool = True,
  5852. clean_pages: bool = True,
  5853. embedded_files: bool = True,
  5854. hidden_text: bool = True,
  5855. javascript: bool = True,
  5856. metadata: bool = True,
  5857. redactions: bool = True,
  5858. redact_images: int = 0,
  5859. remove_links: bool = True,
  5860. reset_fields: bool = True,
  5861. reset_responses: bool = True,
  5862. thumbnails: bool = True,
  5863. xml_metadata: bool = True,
  5864. ) -> None:
  5865. def remove_hidden(cont_lines):
  5866. """Remove hidden text from a PDF page.
  5867. Args:
  5868. cont_lines: list of lines with /Contents content. Should have status
  5869. from after page.cleanContents().
  5870. Returns:
  5871. List of /Contents lines from which hidden text has been removed.
  5872. Notes:
  5873. The input must have been created after the page's /Contents object(s)
  5874. have been cleaned with page.cleanContents(). This ensures a standard
  5875. formatting: one command per line, single spaces between operators.
  5876. This allows for drastic simplification of this code.
  5877. """
  5878. out_lines = [] # will return this
  5879. in_text = False # indicate if within BT/ET object
  5880. suppress = False # indicate text suppression active
  5881. make_return = False
  5882. for line in cont_lines:
  5883. if line == b"BT": # start of text object
  5884. in_text = True # switch on
  5885. out_lines.append(line) # output it
  5886. continue
  5887. if line == b"ET": # end of text object
  5888. in_text = False # switch off
  5889. out_lines.append(line) # output it
  5890. continue
  5891. if line == b"3 Tr": # text suppression operator
  5892. suppress = True # switch on
  5893. make_return = True
  5894. continue
  5895. if line[-2:] == b"Tr" and line[0] != b"3":
  5896. suppress = False # text rendering changed
  5897. out_lines.append(line)
  5898. continue
  5899. if line == b"Q": # unstack command also switches off
  5900. suppress = False
  5901. out_lines.append(line)
  5902. continue
  5903. if suppress and in_text: # suppress hidden lines
  5904. continue
  5905. out_lines.append(line)
  5906. if make_return:
  5907. return out_lines
  5908. else:
  5909. return None
  5910. if not doc.is_pdf: # only works for PDF
  5911. raise ValueError("is no PDF")
  5912. if doc.is_encrypted or doc.is_closed:
  5913. raise ValueError("closed or encrypted doc")
  5914. if not clean_pages:
  5915. hidden_text = False
  5916. redactions = False
  5917. if metadata:
  5918. doc.set_metadata({}) # remove standard metadata
  5919. for page in doc:
  5920. if reset_fields:
  5921. # reset form fields (widgets)
  5922. for widget in page.widgets():
  5923. widget.reset()
  5924. if remove_links:
  5925. links = page.get_links() # list of all links on page
  5926. for link in links: # remove all links
  5927. page.delete_link(link)
  5928. found_redacts = False
  5929. for annot in page.annots():
  5930. if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
  5931. annot.update_file(buffer_=b" ") # set file content to empty
  5932. if reset_responses:
  5933. annot.delete_responses()
  5934. if annot.type[0] == mupdf.PDF_ANNOT_REDACT: # pylint: disable=no-member
  5935. found_redacts = True
  5936. if redactions and found_redacts:
  5937. page.apply_redactions(images=redact_images)
  5938. if not (clean_pages or hidden_text):
  5939. continue # done with the page
  5940. page.clean_contents()
  5941. if not page.get_contents():
  5942. continue
  5943. if hidden_text:
  5944. xrefs = page.get_contents()
  5945. assert len(xrefs) == 1 # only one because of cleaning.
  5946. xref = xrefs[0]
  5947. cont = doc.xref_stream(xref)
  5948. cont_lines = remove_hidden(cont.splitlines()) # remove hidden text
  5949. if cont_lines: # something was actually removed
  5950. cont = b"\n".join(cont_lines)
  5951. doc.update_stream(xref, cont) # rewrite the page /Contents
  5952. if thumbnails: # remove page thumbnails?
  5953. if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
  5954. doc.xref_set_key(page.xref, "Thumb", "null")
  5955. # pages are scrubbed, now perform document-wide scrubbing
  5956. # remove embedded files
  5957. if embedded_files:
  5958. for name in doc.embfile_names():
  5959. doc.embfile_del(name)
  5960. if xml_metadata:
  5961. doc.del_xml_metadata()
  5962. if not (xml_metadata or javascript):
  5963. xref_limit = 0
  5964. else:
  5965. xref_limit = doc.xref_length()
  5966. for xref in range(1, xref_limit):
  5967. if not doc.xref_object(xref):
  5968. msg = "bad xref %i - clean PDF before scrubbing" % xref
  5969. raise ValueError(msg)
  5970. if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
  5971. # a /JavaScript action object
  5972. obj = "<</S/JavaScript/JS()>>" # replace with a null JavaScript
  5973. doc.update_object(xref, obj) # update this object
  5974. continue # no further handling
  5975. if not xml_metadata:
  5976. continue
  5977. if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
  5978. # delete any metadata object directly
  5979. doc.update_object(xref, "<<>>")
  5980. doc.update_stream(xref, b"deleted", new=True)
  5981. continue
  5982. if doc.xref_get_key(xref, "Metadata")[0] != "null":
  5983. doc.xref_set_key(xref, "Metadata", "null")
  5984. def search_page_for(
  5985. doc: 'Document',
  5986. pno: int,
  5987. text: str,
  5988. quads: bool = False,
  5989. clip: rect_like = None,
  5990. flags: int = None,
  5991. textpage: 'TextPage' = None,
  5992. ) -> list:
  5993. """Search for a string on a page.
  5994. Args:
  5995. pno: page number
  5996. text: string to be searched for
  5997. clip: restrict search to this rectangle
  5998. quads: (bool) return quads instead of rectangles
  5999. flags: bit switches, default: join hyphened words
  6000. textpage: reuse a prepared textpage
  6001. Returns:
  6002. a list of rectangles or quads, each containing an occurrence.
  6003. """
  6004. if flags is None:
  6005. flags = (0
  6006. | TEXT_DEHYPHENATE
  6007. | TEXT_PRESERVE_LIGATURES
  6008. | TEXT_PRESERVE_WHITESPACE
  6009. | TEXT_MEDIABOX_CLIP
  6010. )
  6011. return doc[pno].search_for(
  6012. text,
  6013. quads=quads,
  6014. clip=clip,
  6015. flags=flags,
  6016. textpage=textpage,
  6017. )
  6018. def select(self, pyliste):
  6019. """Build sub-pdf with page numbers in the list."""
  6020. if self.is_closed or self.is_encrypted:
  6021. raise ValueError("document closed or encrypted")
  6022. if not self.is_pdf:
  6023. raise ValueError("is no PDF")
  6024. if not hasattr(pyliste, "__getitem__"):
  6025. raise ValueError("sequence required")
  6026. valid_range = range(len(self))
  6027. if (len(pyliste) == 0
  6028. or min(pyliste) not in valid_range
  6029. or max(pyliste) not in valid_range
  6030. ):
  6031. raise ValueError("bad page number(s)")
  6032. # get underlying pdf document,
  6033. pdf = _as_pdf_document(self)
  6034. # create page sub-pdf via pdf_rearrange_pages2().
  6035. #
  6036. if mupdf_version_tuple >= (1, 25, 3):
  6037. # We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree
  6038. # which, for example, breaks test_3705.
  6039. mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP)
  6040. else:
  6041. mupdf.pdf_rearrange_pages2(pdf, pyliste)
  6042. # remove any existing pages with their kids
  6043. self._reset_page_refs()
  6044. def set_language(self, language=None):
  6045. pdf = _as_pdf_document(self)
  6046. if not language:
  6047. lang = mupdf.FZ_LANG_UNSET
  6048. else:
  6049. lang = mupdf.fz_text_language_from_string(language)
  6050. mupdf.pdf_set_document_language(pdf, lang)
  6051. return True
  6052. def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, locked=None):
  6053. """Set the PDF keys /ON, /OFF, /RBGroups of an OC layer."""
  6054. if self.is_closed:
  6055. raise ValueError("document closed")
  6056. ocgs = set(self.get_ocgs().keys())
  6057. if ocgs == set():
  6058. raise ValueError("document has no optional content")
  6059. if on:
  6060. if type(on) not in (list, tuple):
  6061. raise ValueError("bad type: 'on'")
  6062. s = set(on).difference(ocgs)
  6063. if s != set():
  6064. raise ValueError("bad OCGs in 'on': %s" % s)
  6065. if off:
  6066. if type(off) not in (list, tuple):
  6067. raise ValueError("bad type: 'off'")
  6068. s = set(off).difference(ocgs)
  6069. if s != set():
  6070. raise ValueError("bad OCGs in 'off': %s" % s)
  6071. if locked:
  6072. if type(locked) not in (list, tuple):
  6073. raise ValueError("bad type: 'locked'")
  6074. s = set(locked).difference(ocgs)
  6075. if s != set():
  6076. raise ValueError("bad OCGs in 'locked': %s" % s)
  6077. if rbgroups:
  6078. if type(rbgroups) not in (list, tuple):
  6079. raise ValueError("bad type: 'rbgroups'")
  6080. for x in rbgroups:
  6081. if not type(x) in (list, tuple):
  6082. raise ValueError("bad RBGroup '%s'" % x)
  6083. s = set(x).difference(ocgs)
  6084. if s != set():
  6085. raise ValueError("bad OCGs in RBGroup: %s" % s)
  6086. if basestate:
  6087. basestate = str(basestate).upper()
  6088. if basestate == "UNCHANGED":
  6089. basestate = "Unchanged"
  6090. if basestate not in ("ON", "OFF", "Unchanged"):
  6091. raise ValueError("bad 'basestate'")
  6092. pdf = _as_pdf_document(self)
  6093. ocp = mupdf.pdf_dict_getl(
  6094. mupdf.pdf_trailer( pdf),
  6095. PDF_NAME('Root'),
  6096. PDF_NAME('OCProperties'),
  6097. )
  6098. if not ocp.m_internal:
  6099. return
  6100. if config == -1:
  6101. obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
  6102. else:
  6103. obj = mupdf.pdf_array_get(
  6104. mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
  6105. config,
  6106. )
  6107. if not obj.m_internal:
  6108. raise ValueError( MSG_BAD_OC_CONFIG)
  6109. JM_set_ocg_arrays( obj, basestate, on, off, rbgroups, locked)
  6110. mupdf.ll_pdf_read_ocg( pdf.m_internal)
  6111. def set_layer_ui_config(self, number, action=0):
  6112. """Set / unset OC intent configuration."""
  6113. # The user might have given the name instead of sequence number,
  6114. # so select by that name and continue with corresp. number
  6115. if isinstance(number, str):
  6116. select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number]
  6117. if select == []:
  6118. raise ValueError(f"bad OCG '{number}'.")
  6119. number = select[0] # this is the number for the name
  6120. pdf = _as_pdf_document(self)
  6121. if action == 1:
  6122. mupdf.pdf_toggle_layer_config_ui(pdf, number)
  6123. elif action == 2:
  6124. mupdf.pdf_deselect_layer_config_ui(pdf, number)
  6125. else:
  6126. mupdf.pdf_select_layer_config_ui(pdf, number)
  6127. def set_markinfo(self, markinfo: dict) -> bool:
  6128. """Set the PDF MarkInfo values."""
  6129. xref = self.pdf_catalog()
  6130. if xref == 0:
  6131. raise ValueError("not a PDF")
  6132. if not markinfo or not isinstance(markinfo, dict):
  6133. return False
  6134. valid = {"Marked": False, "UserProperties": False, "Suspects": False}
  6135. if not set(valid.keys()).issuperset(markinfo.keys()):
  6136. badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}"
  6137. raise ValueError(badkeys)
  6138. pdfdict = "<<"
  6139. valid.update(markinfo)
  6140. for key, value in valid.items():
  6141. value=str(value).lower()
  6142. if value not in ("true", "false"):
  6143. raise ValueError(f"bad key value '{key}': '{value}'")
  6144. pdfdict += f"/{key} {value}"
  6145. pdfdict += ">>"
  6146. self.xref_set_key(xref, "MarkInfo", pdfdict)
  6147. return True
  6148. def set_metadata(doc: 'Document', m: dict = None) -> None:
  6149. """Update the PDF /Info object.
  6150. Args:
  6151. m: a dictionary like doc.metadata.
  6152. """
  6153. if not doc.is_pdf:
  6154. raise ValueError("is no PDF")
  6155. if doc.is_closed or doc.is_encrypted:
  6156. raise ValueError("document closed or encrypted")
  6157. if m is None:
  6158. m = {}
  6159. elif type(m) is not dict:
  6160. raise ValueError("bad metadata")
  6161. keymap = {
  6162. "author": "Author",
  6163. "producer": "Producer",
  6164. "creator": "Creator",
  6165. "title": "Title",
  6166. "format": None,
  6167. "encryption": None,
  6168. "creationDate": "CreationDate",
  6169. "modDate": "ModDate",
  6170. "subject": "Subject",
  6171. "keywords": "Keywords",
  6172. "trapped": "Trapped",
  6173. }
  6174. valid_keys = set(keymap.keys())
  6175. diff_set = set(m.keys()).difference(valid_keys)
  6176. if diff_set != set():
  6177. msg = "bad dict key(s): %s" % diff_set
  6178. raise ValueError(msg)
  6179. t, temp = doc.xref_get_key(-1, "Info")
  6180. if t != "xref":
  6181. info_xref = 0
  6182. else:
  6183. info_xref = int(temp.replace("0 R", ""))
  6184. if m == {} and info_xref == 0: # nothing to do
  6185. return
  6186. if info_xref == 0: # no prev metadata: get new xref
  6187. info_xref = doc.get_new_xref()
  6188. doc.update_object(info_xref, "<<>>") # fill it with empty object
  6189. doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
  6190. elif m == {}: # remove existing metadata
  6191. doc.xref_set_key(-1, "Info", "null")
  6192. doc.init_doc()
  6193. return
  6194. for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]:
  6195. pdf_key = keymap[key]
  6196. if not bool(val) or val in ("none", "null"):
  6197. val = "null"
  6198. else:
  6199. val = get_pdf_str(val)
  6200. doc.xref_set_key(info_xref, pdf_key, val)
  6201. doc.init_doc()
  6202. return
  6203. def set_oc(doc: 'Document', xref: int, oc: int) -> None:
  6204. """Attach optional content object to image or form xobject.
  6205. Args:
  6206. xref: (int) xref number of an image or form xobject
  6207. oc: (int) xref number of an OCG or OCMD
  6208. """
  6209. if doc.is_closed or doc.is_encrypted:
  6210. raise ValueError("document close or encrypted")
  6211. t, name = doc.xref_get_key(xref, "Subtype")
  6212. if t != "name" or name not in ("/Image", "/Form"):
  6213. raise ValueError("bad object type at xref %i" % xref)
  6214. if oc > 0:
  6215. t, name = doc.xref_get_key(oc, "Type")
  6216. if t != "name" or name not in ("/OCG", "/OCMD"):
  6217. raise ValueError("bad object type at xref %i" % oc)
  6218. if oc == 0 and "OC" in doc.xref_get_keys(xref):
  6219. doc.xref_set_key(xref, "OC", "null")
  6220. return None
  6221. doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
  6222. return None
  6223. def set_ocmd(
  6224. doc: 'Document',
  6225. xref: int = 0,
  6226. ocgs: typing.Union[list, None] = None,
  6227. policy: OptStr = None,
  6228. ve: typing.Union[list, None] = None,
  6229. ) -> int:
  6230. """Create or update an OCMD object in a PDF document.
  6231. Args:
  6232. xref: (int) 0 for creating a new object, otherwise update existing one.
  6233. ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
  6234. policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
  6235. ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.
  6236. Returns:
  6237. Xref of the created or updated OCMD.
  6238. """
  6239. all_ocgs = set(doc.get_ocgs().keys())
  6240. def ve_maker(ve):
  6241. if type(ve) not in (list, tuple) or len(ve) < 2:
  6242. raise ValueError("bad 've' format: %s" % ve)
  6243. if ve[0].lower() not in ("and", "or", "not"):
  6244. raise ValueError("bad operand: %s" % ve[0])
  6245. if ve[0].lower() == "not" and len(ve) != 2:
  6246. raise ValueError("bad 've' format: %s" % ve)
  6247. item = "[/%s" % ve[0].title()
  6248. for x in ve[1:]:
  6249. if type(x) is int:
  6250. if x not in all_ocgs:
  6251. raise ValueError("bad OCG %i" % x)
  6252. item += " %i 0 R" % x
  6253. else:
  6254. item += " %s" % ve_maker(x)
  6255. item += "]"
  6256. return item
  6257. text = "<</Type/OCMD"
  6258. if ocgs and type(ocgs) in (list, tuple): # some OCGs are provided
  6259. s = set(ocgs).difference(all_ocgs) # contains illegal xrefs
  6260. if s != set():
  6261. msg = "bad OCGs: %s" % s
  6262. raise ValueError(msg)
  6263. text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"
  6264. if policy:
  6265. policy = str(policy).lower()
  6266. pols = {
  6267. "anyon": "AnyOn",
  6268. "allon": "AllOn",
  6269. "anyoff": "AnyOff",
  6270. "alloff": "AllOff",
  6271. }
  6272. if policy not in ("anyon", "allon", "anyoff", "alloff"):
  6273. raise ValueError("bad policy: %s" % policy)
  6274. text += "/P/%s" % pols[policy]
  6275. if ve:
  6276. text += "/VE%s" % ve_maker(ve)
  6277. text += ">>"
  6278. # make new object or replace old OCMD (check type first)
  6279. if xref == 0:
  6280. xref = doc.get_new_xref()
  6281. elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
  6282. raise ValueError("bad xref or not an OCMD")
  6283. doc.update_object(xref, text)
  6284. return xref
  6285. def set_pagelayout(self, pagelayout: str):
  6286. """Set the PDF PageLayout value."""
  6287. valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
  6288. xref = self.pdf_catalog()
  6289. if xref == 0:
  6290. raise ValueError("not a PDF")
  6291. if not pagelayout:
  6292. raise ValueError("bad PageLayout value")
  6293. if pagelayout[0] == "/":
  6294. pagelayout = pagelayout[1:]
  6295. for v in valid:
  6296. if pagelayout.lower() == v.lower():
  6297. self.xref_set_key(xref, "PageLayout", f"/{v}")
  6298. return True
  6299. raise ValueError("bad PageLayout value")
  6300. def set_pagemode(self, pagemode: str):
  6301. """Set the PDF PageMode value."""
  6302. valid = ("UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments")
  6303. xref = self.pdf_catalog()
  6304. if xref == 0:
  6305. raise ValueError("not a PDF")
  6306. if not pagemode:
  6307. raise ValueError("bad PageMode value")
  6308. if pagemode[0] == "/":
  6309. pagemode = pagemode[1:]
  6310. for v in valid:
  6311. if pagemode.lower() == v.lower():
  6312. self.xref_set_key(xref, "PageMode", f"/{v}")
  6313. return True
  6314. raise ValueError("bad PageMode value")
  6315. def set_page_labels(doc, labels):
  6316. """Add / replace page label definitions in PDF document.
  6317. Args:
  6318. doc: PDF document (resp. 'self').
  6319. labels: list of label dictionaries like:
  6320. {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
  6321. as returned by get_page_labels().
  6322. """
  6323. # William Chapman, 2021-01-06
  6324. def create_label_str(label):
  6325. """Convert Python label dict to corresponding PDF rule string.
  6326. Args:
  6327. label: (dict) build rule for the label.
  6328. Returns:
  6329. PDF label rule string wrapped in "<<", ">>".
  6330. """
  6331. s = "%i<<" % label["startpage"]
  6332. if label.get("prefix", "") != "":
  6333. s += "/P(%s)" % label["prefix"]
  6334. if label.get("style", "") != "":
  6335. s += "/S/%s" % label["style"]
  6336. if label.get("firstpagenum", 1) > 1:
  6337. s += "/St %i" % label["firstpagenum"]
  6338. s += ">>"
  6339. return s
  6340. def create_nums(labels):
  6341. """Return concatenated string of all labels rules.
  6342. Args:
  6343. labels: (list) dictionaries as created by function 'rule_dict'.
  6344. Returns:
  6345. PDF compatible string for page label definitions, ready to be
  6346. enclosed in PDF array 'Nums[...]'.
  6347. """
  6348. labels.sort(key=lambda x: x["startpage"])
  6349. s = "".join([create_label_str(label) for label in labels])
  6350. return s
  6351. doc._set_page_labels(create_nums(labels))
  6352. def set_toc(
  6353. doc: 'Document',
  6354. toc: list,
  6355. collapse: int = 1,
  6356. ) -> int:
  6357. """Create new outline tree (table of contents, TOC).
  6358. Args:
  6359. toc: (list, tuple) each entry must contain level, title, page and
  6360. optionally top margin on the page. None or '()' remove the TOC.
  6361. collapse: (int) collapses entries beyond this level. Zero or None
  6362. shows all entries unfolded.
  6363. Returns:
  6364. the number of inserted items, or the number of removed items respectively.
  6365. """
  6366. if doc.is_closed or doc.is_encrypted:
  6367. raise ValueError("document closed or encrypted")
  6368. if not doc.is_pdf:
  6369. raise ValueError("is no PDF")
  6370. if not toc: # remove all entries
  6371. return len(doc._delToC())
  6372. # validity checks --------------------------------------------------------
  6373. if type(toc) not in (list, tuple):
  6374. raise ValueError("'toc' must be list or tuple")
  6375. toclen = len(toc)
  6376. page_count = doc.page_count
  6377. t0 = toc[0]
  6378. if type(t0) not in (list, tuple):
  6379. raise ValueError("items must be sequences of 3 or 4 items")
  6380. if t0[0] != 1:
  6381. raise ValueError("hierarchy level of item 0 must be 1")
  6382. for i in list(range(toclen - 1)):
  6383. t1 = toc[i]
  6384. t2 = toc[i + 1]
  6385. if not -1 <= t1[2] <= page_count:
  6386. raise ValueError("row %i: page number out of range" % i)
  6387. if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
  6388. raise ValueError("bad row %i" % (i + 1))
  6389. if (type(t2[0]) is not int) or t2[0] < 1:
  6390. raise ValueError("bad hierarchy level in row %i" % (i + 1))
  6391. if t2[0] > t1[0] + 1:
  6392. raise ValueError("bad hierarchy level in row %i" % (i + 1))
  6393. # no formal errors in toc --------------------------------------------------
  6394. # --------------------------------------------------------------------------
  6395. # make a list of xref numbers, which we can use for our TOC entries
  6396. # --------------------------------------------------------------------------
  6397. old_xrefs = doc._delToC() # del old outlines, get their xref numbers
  6398. # prepare table of xrefs for new bookmarks
  6399. old_xrefs = []
  6400. xref = [0] + old_xrefs
  6401. xref[0] = doc._getOLRootNumber() # entry zero is outline root xref number
  6402. if toclen > len(old_xrefs): # too few old xrefs?
  6403. for i in range((toclen - len(old_xrefs))):
  6404. xref.append(doc.get_new_xref()) # acquire new ones
  6405. lvltab = {0: 0} # to store last entry per hierarchy level
  6406. # ------------------------------------------------------------------------------
  6407. # contains new outline objects as strings - first one is the outline root
  6408. # ------------------------------------------------------------------------------
  6409. olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
  6410. # ------------------------------------------------------------------------------
  6411. # build olitems as a list of PDF-like connected dictionaries
  6412. # ------------------------------------------------------------------------------
  6413. for i in range(toclen):
  6414. o = toc[i]
  6415. lvl = o[0] # level
  6416. title = get_pdf_str(o[1]) # title
  6417. pno = min(doc.page_count - 1, max(0, o[2] - 1)) # page number
  6418. page_xref = doc.page_xref(pno)
  6419. page_height = doc.page_cropbox(pno).height
  6420. top = Point(72, page_height - 36)
  6421. dest_dict = {"to": top, "kind": LINK_GOTO} # fall back target
  6422. if o[2] < 0:
  6423. dest_dict["kind"] = LINK_NONE
  6424. if len(o) > 3: # some target is specified
  6425. if type(o[3]) in (int, float): # convert a number to a point
  6426. dest_dict["to"] = Point(72, page_height - o[3])
  6427. else: # if something else, make sure we have a dict
  6428. # We make a copy of o[3] to avoid modifying our caller's data.
  6429. dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict
  6430. if "to" not in dest_dict: # target point not in dict?
  6431. dest_dict["to"] = top # put default in
  6432. else: # transform target to PDF coordinates
  6433. page = doc[pno]
  6434. point = Point(dest_dict["to"])
  6435. point.y = page.cropbox.height - point.y
  6436. point = point * page.rotation_matrix
  6437. dest_dict["to"] = (point.x, point.y)
  6438. d = {}
  6439. d["first"] = -1
  6440. d["count"] = 0
  6441. d["last"] = -1
  6442. d["prev"] = -1
  6443. d["next"] = -1
  6444. d["dest"] = utils.getDestStr(page_xref, dest_dict)
  6445. d["top"] = dest_dict["to"]
  6446. d["title"] = title
  6447. d["parent"] = lvltab[lvl - 1]
  6448. d["xref"] = xref[i + 1]
  6449. d["color"] = dest_dict.get("color")
  6450. d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
  6451. lvltab[lvl] = i + 1
  6452. parent = olitems[lvltab[lvl - 1]] # the parent entry
  6453. if (
  6454. dest_dict.get("collapse") or collapse and lvl > collapse
  6455. ): # suppress expansion
  6456. parent["count"] -= 1 # make /Count negative
  6457. else:
  6458. parent["count"] += 1 # positive /Count
  6459. if parent["first"] == -1:
  6460. parent["first"] = i + 1
  6461. parent["last"] = i + 1
  6462. else:
  6463. d["prev"] = parent["last"]
  6464. prev = olitems[parent["last"]]
  6465. prev["next"] = i + 1
  6466. parent["last"] = i + 1
  6467. olitems.append(d)
  6468. # ------------------------------------------------------------------------------
  6469. # now create each outline item as a string and insert it in the PDF
  6470. # ------------------------------------------------------------------------------
  6471. for i, ol in enumerate(olitems):
  6472. txt = "<<"
  6473. if ol["count"] != 0:
  6474. txt += "/Count %i" % ol["count"]
  6475. try:
  6476. txt += ol["dest"]
  6477. except Exception:
  6478. # Verbose in PyMuPDF/tests.
  6479. if g_exceptions_verbose >= 2: exception_info()
  6480. pass
  6481. try:
  6482. if ol["first"] > -1:
  6483. txt += "/First %i 0 R" % xref[ol["first"]]
  6484. except Exception:
  6485. if g_exceptions_verbose >= 2: exception_info()
  6486. pass
  6487. try:
  6488. if ol["last"] > -1:
  6489. txt += "/Last %i 0 R" % xref[ol["last"]]
  6490. except Exception:
  6491. if g_exceptions_verbose >= 2: exception_info()
  6492. pass
  6493. try:
  6494. if ol["next"] > -1:
  6495. txt += "/Next %i 0 R" % xref[ol["next"]]
  6496. except Exception:
  6497. # Verbose in PyMuPDF/tests.
  6498. if g_exceptions_verbose >= 2: exception_info()
  6499. pass
  6500. try:
  6501. if ol["parent"] > -1:
  6502. txt += "/Parent %i 0 R" % xref[ol["parent"]]
  6503. except Exception:
  6504. # Verbose in PyMuPDF/tests.
  6505. if g_exceptions_verbose >= 2: exception_info()
  6506. pass
  6507. try:
  6508. if ol["prev"] > -1:
  6509. txt += "/Prev %i 0 R" % xref[ol["prev"]]
  6510. except Exception:
  6511. # Verbose in PyMuPDF/tests.
  6512. if g_exceptions_verbose >= 2: exception_info()
  6513. pass
  6514. try:
  6515. txt += "/Title" + ol["title"]
  6516. except Exception:
  6517. # Verbose in PyMuPDF/tests.
  6518. if g_exceptions_verbose >= 2: exception_info()
  6519. pass
  6520. if ol.get("color") and len(ol["color"]) == 3:
  6521. txt += f"/C[ {_format_g(tuple(ol['color']))}]"
  6522. if ol.get("flags", 0) > 0:
  6523. txt += "/F %i" % ol["flags"]
  6524. if i == 0: # special: this is the outline root
  6525. txt += "/Type/Outlines" # so add the /Type entry
  6526. txt += ">>"
  6527. doc.update_object(xref[i], txt) # insert the PDF object
  6528. doc.init_doc()
  6529. return toclen
  6530. def set_toc_item(
  6531. doc: 'Document',
  6532. idx: int,
  6533. dest_dict: OptDict = None,
  6534. kind: OptInt = None,
  6535. pno: OptInt = None,
  6536. uri: OptStr = None,
  6537. title: OptStr = None,
  6538. to: point_like = None,
  6539. filename: OptStr = None,
  6540. zoom: float = 0,
  6541. ) -> None:
  6542. """Update TOC item by index.
  6543. It allows changing the item's title and link destination.
  6544. Args:
  6545. idx:
  6546. (int) desired index of the TOC list, as created by get_toc.
  6547. dest_dict:
  6548. (dict) destination dictionary as created by get_toc(False).
  6549. Outrules all other parameters. If None, the remaining parameters
  6550. are used to make a dest dictionary.
  6551. kind:
  6552. (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only
  6553. the title will be updated. If pymupdf.LINK_NONE, the TOC item will
  6554. be deleted.
  6555. pno:
  6556. (int) page number (1-based like in get_toc). Required if
  6557. pymupdf.LINK_GOTO.
  6558. uri:
  6559. (str) the URL, required if pymupdf.LINK_URI.
  6560. title:
  6561. (str) the new title. No change if None.
  6562. to:
  6563. (point-like) destination on the target page. If omitted, (72, 36)
  6564. will be used as target coordinates.
  6565. filename:
  6566. (str) destination filename, required for pymupdf.LINK_GOTOR and
  6567. pymupdf.LINK_LAUNCH.
  6568. name:
  6569. (str) a destination name for pymupdf.LINK_NAMED.
  6570. zoom:
  6571. (float) a zoom factor for the target location (pymupdf.LINK_GOTO).
  6572. """
  6573. xref = doc.get_outline_xrefs()[idx]
  6574. page_xref = 0
  6575. if type(dest_dict) is dict:
  6576. if dest_dict["kind"] == LINK_GOTO:
  6577. pno = dest_dict["page"]
  6578. page_xref = doc.page_xref(pno)
  6579. page_height = doc.page_cropbox(pno).height
  6580. to = dest_dict.get('to', Point(72, 36))
  6581. to.y = page_height - to.y
  6582. dest_dict["to"] = to
  6583. action = utils.getDestStr(page_xref, dest_dict)
  6584. if not action.startswith("/A"):
  6585. raise ValueError("bad bookmark dest")
  6586. color = dest_dict.get("color")
  6587. if color:
  6588. color = list(map(float, color))
  6589. if len(color) != 3 or min(color) < 0 or max(color) > 1:
  6590. raise ValueError("bad color value")
  6591. bold = dest_dict.get("bold", False)
  6592. italic = dest_dict.get("italic", False)
  6593. flags = italic + 2 * bold
  6594. collapse = dest_dict.get("collapse")
  6595. return doc._update_toc_item(
  6596. xref,
  6597. action=action[2:],
  6598. title=title,
  6599. color=color,
  6600. flags=flags,
  6601. collapse=collapse,
  6602. )
  6603. if kind == LINK_NONE: # delete bookmark item
  6604. return doc.del_toc_item(idx)
  6605. if kind is None and title is None: # treat as no-op
  6606. return None
  6607. if kind is None: # only update title text
  6608. return doc._update_toc_item(xref, action=None, title=title)
  6609. if kind == LINK_GOTO:
  6610. if pno is None or pno not in range(1, doc.page_count + 1):
  6611. raise ValueError("bad page number")
  6612. page_xref = doc.page_xref(pno - 1)
  6613. page_height = doc.page_cropbox(pno - 1).height
  6614. if to is None:
  6615. to = Point(72, page_height - 36)
  6616. else:
  6617. to = Point(to)
  6618. to.y = page_height - to.y
  6619. ddict = {
  6620. "kind": kind,
  6621. "to": to,
  6622. "uri": uri,
  6623. "page": pno,
  6624. "file": filename,
  6625. "zoom": zoom,
  6626. }
  6627. action = utils.getDestStr(page_xref, ddict)
  6628. if action == "" or not action.startswith("/A"):
  6629. raise ValueError("bad bookmark dest")
  6630. return doc._update_toc_item(xref, action=action[2:], title=title)
  6631. def set_xml_metadata(self, metadata):
  6632. """Store XML document level metadata."""
  6633. if self.is_closed or self.is_encrypted:
  6634. raise ValueError("document closed or encrypted")
  6635. pdf = _as_pdf_document(self)
  6636. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  6637. if not root.m_internal:
  6638. RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
  6639. res = mupdf.fz_new_buffer_from_copied_data( metadata.encode('utf-8'))
  6640. xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
  6641. if xml.m_internal:
  6642. JM_update_stream( pdf, xml, res, 0)
  6643. else:
  6644. xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0)
  6645. mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata'))
  6646. mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
  6647. mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
  6648. def subset_fonts(doc: 'Document', verbose: bool = False, fallback: bool = False) -> OptInt:
  6649. """Build font subsets in a PDF.
  6650. Eligible fonts are potentially replaced by smaller versions. Page text is
  6651. NOT rewritten and thus should retain properties like being hidden or
  6652. controlled by optional content.
  6653. This method by default uses MuPDF's own internal feature to create subset
  6654. fonts. As this is a new function, errors may still occur. In this case,
  6655. please fall back to using the previous version by using "fallback=True".
  6656. Fallback mode requires the external package 'fontTools'.
  6657. Args:
  6658. fallback: use the older deprecated implementation.
  6659. verbose: only used by fallback mode.
  6660. Returns:
  6661. The new MuPDF-based code returns None. The deprecated fallback
  6662. mode returns 0 if there are no fonts to subset. Otherwise, it
  6663. returns the decrease in fontsize (the difference in fontsize),
  6664. measured in bytes.
  6665. """
  6666. # Font binaries: - "buffer" -> (names, xrefs, (unicodes, glyphs))
  6667. # An embedded font is uniquely defined by its fontbuffer only. It may have
  6668. # multiple names and xrefs.
  6669. # Once the sets of used unicodes and glyphs are known, we compute a
  6670. # smaller version of the buffer user package fontTools.
  6671. if not fallback: # by default use MuPDF function
  6672. pdf = mupdf.pdf_document_from_fz_document(doc)
  6673. mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
  6674. return
  6675. font_buffers = {}
  6676. def get_old_widths(xref):
  6677. """Retrieve old font '/W' and '/DW' values."""
  6678. df = doc.xref_get_key(xref, "DescendantFonts")
  6679. if df[0] != "array": # only handle xref specifications
  6680. return None, None
  6681. df_xref = int(df[1][1:-1].replace("0 R", ""))
  6682. widths = doc.xref_get_key(df_xref, "W")
  6683. if widths[0] != "array": # no widths key found
  6684. widths = None
  6685. else:
  6686. widths = widths[1]
  6687. dwidths = doc.xref_get_key(df_xref, "DW")
  6688. if dwidths[0] != "int":
  6689. dwidths = None
  6690. else:
  6691. dwidths = dwidths[1]
  6692. return widths, dwidths
  6693. def set_old_widths(xref, widths, dwidths):
  6694. """Restore the old '/W' and '/DW' in subsetted font.
  6695. If either parameter is None or evaluates to False, the corresponding
  6696. dictionary key will be set to null.
  6697. """
  6698. df = doc.xref_get_key(xref, "DescendantFonts")
  6699. if df[0] != "array": # only handle xref specs
  6700. return None
  6701. df_xref = int(df[1][1:-1].replace("0 R", ""))
  6702. if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
  6703. 0
  6704. ] != "null":
  6705. doc.xref_set_key(df_xref, "W", "null")
  6706. else:
  6707. doc.xref_set_key(df_xref, "W", widths)
  6708. if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
  6709. df_xref, "DW"
  6710. )[0] != "null":
  6711. doc.xref_set_key(df_xref, "DW", "null")
  6712. else:
  6713. doc.xref_set_key(df_xref, "DW", dwidths)
  6714. return None
  6715. def set_subset_fontname(new_xref):
  6716. """Generate a name prefix to tag a font as subset.
  6717. We use a random generator to select 6 upper case ASCII characters.
  6718. The prefixed name must be put in the font xref as the "/BaseFont" value
  6719. and in the FontDescriptor object as the '/FontName' value.
  6720. """
  6721. # The following generates a prefix like 'ABCDEF+'
  6722. import random
  6723. import string
  6724. prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
  6725. font_str = doc.xref_object(new_xref, compressed=True)
  6726. font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
  6727. df = doc.xref_get_key(new_xref, "DescendantFonts")
  6728. if df[0] == "array":
  6729. df_xref = int(df[1][1:-1].replace("0 R", ""))
  6730. fd = doc.xref_get_key(df_xref, "FontDescriptor")
  6731. if fd[0] == "xref":
  6732. fd_xref = int(fd[1].replace("0 R", ""))
  6733. fd_str = doc.xref_object(fd_xref, compressed=True)
  6734. fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
  6735. doc.update_object(fd_xref, fd_str)
  6736. doc.update_object(new_xref, font_str)
  6737. def build_subset(buffer, unc_set, gid_set):
  6738. """Build font subset using fontTools.
  6739. Args:
  6740. buffer: (bytes) the font given as a binary buffer.
  6741. unc_set: (set) required glyph ids.
  6742. Returns:
  6743. Either None if subsetting is unsuccessful or the subset font buffer.
  6744. """
  6745. try:
  6746. import fontTools.subset as fts
  6747. except ImportError:
  6748. if g_exceptions_verbose: exception_info()
  6749. message("This method requires fontTools to be installed.")
  6750. raise
  6751. import tempfile
  6752. with tempfile.TemporaryDirectory() as tmp_dir:
  6753. oldfont_path = f"{tmp_dir}/oldfont.ttf"
  6754. newfont_path = f"{tmp_dir}/newfont.ttf"
  6755. uncfile_path = f"{tmp_dir}/uncfile.txt"
  6756. args = [
  6757. oldfont_path,
  6758. "--retain-gids",
  6759. f"--output-file={newfont_path}",
  6760. "--layout-features=*",
  6761. "--passthrough-tables",
  6762. "--ignore-missing-glyphs",
  6763. "--ignore-missing-unicodes",
  6764. "--symbol-cmap",
  6765. ]
  6766. # store glyph ids or unicodes as file
  6767. with io.open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file:
  6768. if 0xFFFD in unc_set: # error unicode exists -> use glyphs
  6769. args.append(f"--gids-file={uncfile_path}")
  6770. gid_set.add(189)
  6771. unc_list = list(gid_set)
  6772. for unc in unc_list:
  6773. unc_file.write("%i\n" % unc)
  6774. else:
  6775. args.append(f"--unicodes-file={uncfile_path}")
  6776. unc_set.add(255)
  6777. unc_list = list(unc_set)
  6778. for unc in unc_list:
  6779. unc_file.write("%04x\n" % unc)
  6780. # store fontbuffer as a file
  6781. with io.open(oldfont_path, "wb") as fontfile:
  6782. fontfile.write(buffer)
  6783. try:
  6784. os.remove(newfont_path) # remove old file
  6785. except Exception:
  6786. pass
  6787. try: # invoke fontTools subsetter
  6788. fts.main(args)
  6789. font = Font(fontfile=newfont_path)
  6790. new_buffer = font.buffer # subset font binary
  6791. if font.glyph_count == 0: # intercept empty font
  6792. new_buffer = None
  6793. except Exception:
  6794. exception_info()
  6795. new_buffer = None
  6796. return new_buffer
  6797. def repl_fontnames(doc):
  6798. """Populate 'font_buffers'.
  6799. For each font candidate, store its xref and the list of names
  6800. by which PDF text may refer to it (there may be multiple).
  6801. """
  6802. def norm_name(name):
  6803. """Recreate font name that contains PDF hex codes.
  6804. E.g. #20 -> space, chr(32)
  6805. """
  6806. while "#" in name:
  6807. p = name.find("#")
  6808. c = int(name[p + 1 : p + 3], 16)
  6809. name = name.replace(name[p : p + 3], chr(c))
  6810. return name
  6811. def get_fontnames(doc, item):
  6812. """Return a list of fontnames for an item of page.get_fonts().
  6813. There may be multiple names e.g. for Type0 fonts.
  6814. """
  6815. fontname = item[3]
  6816. names = [fontname]
  6817. fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
  6818. fontname = norm_name(fontname)
  6819. if fontname not in names:
  6820. names.append(fontname)
  6821. descendents = doc.xref_get_key(item[0], "DescendantFonts")
  6822. if descendents[0] != "array":
  6823. return names
  6824. descendents = descendents[1][1:-1]
  6825. if descendents.endswith(" 0 R"):
  6826. xref = int(descendents[:-4])
  6827. descendents = doc.xref_object(xref, compressed=True)
  6828. p1 = descendents.find("/BaseFont")
  6829. if p1 >= 0:
  6830. p2 = descendents.find("/", p1 + 1)
  6831. p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
  6832. fontname = descendents[p2 + 1 : p1]
  6833. fontname = norm_name(fontname)
  6834. if fontname not in names:
  6835. names.append(fontname)
  6836. return names
  6837. for i in range(doc.page_count):
  6838. for f in doc.get_page_fonts(i, full=True):
  6839. font_xref = f[0] # font xref
  6840. font_ext = f[1] # font file extension
  6841. basename = f[3] # font basename
  6842. if font_ext not in ( # skip if not supported by fontTools
  6843. "otf",
  6844. "ttf",
  6845. "woff",
  6846. "woff2",
  6847. ):
  6848. continue
  6849. # skip fonts which already are subsets
  6850. if len(basename) > 6 and basename[6] == "+":
  6851. continue
  6852. extr = doc.extract_font(font_xref)
  6853. fontbuffer = extr[-1]
  6854. names = get_fontnames(doc, f)
  6855. name_set, xref_set, subsets = font_buffers.get(
  6856. fontbuffer, (set(), set(), (set(), set()))
  6857. )
  6858. xref_set.add(font_xref)
  6859. for name in names:
  6860. name_set.add(name)
  6861. font = Font(fontbuffer=fontbuffer)
  6862. name_set.add(font.name)
  6863. del font
  6864. font_buffers[fontbuffer] = (name_set, xref_set, subsets)
  6865. def find_buffer_by_name(name):
  6866. for buffer, (name_set, _, _) in font_buffers.items():
  6867. if name in name_set:
  6868. return buffer
  6869. return None
  6870. # -----------------
  6871. # main function
  6872. # -----------------
  6873. repl_fontnames(doc) # populate font information
  6874. if not font_buffers: # nothing found to do
  6875. if verbose:
  6876. message(f'No fonts to subset.')
  6877. return 0
  6878. old_fontsize = 0
  6879. new_fontsize = 0
  6880. for fontbuffer in font_buffers.keys():
  6881. old_fontsize += len(fontbuffer)
  6882. # Scan page text for usage of subsettable fonts
  6883. for page in doc:
  6884. # go through the text and extend set of used glyphs by font
  6885. # we use a modified MuPDF trace device, which delivers us glyph ids.
  6886. for span in page.get_texttrace():
  6887. if type(span) is not dict: # skip useless information
  6888. continue
  6889. fontname = span["font"][:33] # fontname for the span
  6890. buffer = find_buffer_by_name(fontname)
  6891. if buffer is None:
  6892. continue
  6893. name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
  6894. for c in span["chars"]:
  6895. set_ucs.add(c[0]) # unicode
  6896. set_gid.add(c[1]) # glyph id
  6897. font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))
  6898. # build the font subsets
  6899. for old_buffer, (name_set, xref_set, subsets) in font_buffers.items():
  6900. new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
  6901. fontname = list(name_set)[0]
  6902. if new_buffer is None or len(new_buffer) >= len(old_buffer):
  6903. # subset was not created or did not get smaller
  6904. if verbose:
  6905. message(f'Cannot subset {fontname!r}.')
  6906. continue
  6907. if verbose:
  6908. message(f"Built subset of font {fontname!r}.")
  6909. val = doc._insert_font(fontbuffer=new_buffer) # store subset font in PDF
  6910. new_xref = val[0] # get its xref
  6911. set_subset_fontname(new_xref) # tag fontname as subset font
  6912. font_str = doc.xref_object( # get its object definition
  6913. new_xref,
  6914. compressed=True,
  6915. )
  6916. # walk through the original font xrefs and replace each by the subset def
  6917. for font_xref in xref_set:
  6918. # we need the original '/W' and '/DW' width values
  6919. width_table, def_width = get_old_widths(font_xref)
  6920. # ... and replace original font definition at xref with it
  6921. doc.update_object(font_xref, font_str)
  6922. # now copy over old '/W' and '/DW' values
  6923. if width_table or def_width:
  6924. set_old_widths(font_xref, width_table, def_width)
  6925. # 'new_xref' remains unused in the PDF and must be removed
  6926. # by garbage collection.
  6927. new_fontsize += len(new_buffer)
  6928. return old_fontsize - new_fontsize
  6929. def switch_layer(self, config, as_default=0):
  6930. """Activate an OC layer."""
  6931. pdf = _as_pdf_document(self)
  6932. cfgs = mupdf.pdf_dict_getl(
  6933. mupdf.pdf_trailer( pdf),
  6934. PDF_NAME('Root'),
  6935. PDF_NAME('OCProperties'),
  6936. PDF_NAME('Configs')
  6937. )
  6938. if not mupdf.pdf_is_array( cfgs) or not mupdf.pdf_array_len( cfgs):
  6939. if config < 1:
  6940. return
  6941. raise ValueError( MSG_BAD_OC_LAYER)
  6942. if config < 0:
  6943. return
  6944. mupdf.pdf_select_layer_config( pdf, config)
  6945. if as_default:
  6946. mupdf.pdf_set_layer_config_as_default( pdf)
  6947. mupdf.ll_pdf_read_ocg( pdf.m_internal)
  6948. def update_object(self, xref, text, page=None):
  6949. """Replace object definition source."""
  6950. if self.is_closed or self.is_encrypted:
  6951. raise ValueError("document closed or encrypted")
  6952. pdf = _as_pdf_document(self)
  6953. xreflen = mupdf.pdf_xref_len(pdf)
  6954. if not _INRANGE(xref, 1, xreflen-1):
  6955. RAISEPY("bad xref", MSG_BAD_XREF)
  6956. ENSURE_OPERATION(pdf)
  6957. # create new object with passed-in string
  6958. new_obj = JM_pdf_obj_from_str(pdf, text)
  6959. mupdf.pdf_update_object(pdf, xref, new_obj)
  6960. if page:
  6961. JM_refresh_links( _as_pdf_page(page))
  6962. def update_stream(self, xref=0, stream=None, new=1, compress=1):
  6963. """Replace xref stream part."""
  6964. if self.is_closed or self.is_encrypted:
  6965. raise ValueError("document closed or encrypted")
  6966. pdf = _as_pdf_document(self)
  6967. xreflen = mupdf.pdf_xref_len(pdf)
  6968. if xref < 1 or xref > xreflen:
  6969. raise ValueError( MSG_BAD_XREF)
  6970. # get the object
  6971. obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  6972. if not mupdf.pdf_is_dict(obj):
  6973. raise ValueError( MSG_IS_NO_DICT)
  6974. res = JM_BufferFromBytes(stream)
  6975. if not res.m_internal:
  6976. raise TypeError( MSG_BAD_BUFFER)
  6977. JM_update_stream(pdf, obj, res, compress)
  6978. pdf.dirty = 1
  6979. @property
  6980. def version_count(self):
  6981. '''
  6982. Count versions of PDF document.
  6983. '''
  6984. pdf = _as_pdf_document(self, required=0)
  6985. if pdf.m_internal:
  6986. return mupdf.pdf_count_versions(pdf)
  6987. return 0
  6988. def write(
  6989. self,
  6990. garbage=False,
  6991. clean=False,
  6992. deflate=False,
  6993. deflate_images=False,
  6994. deflate_fonts=False,
  6995. incremental=False,
  6996. ascii=False,
  6997. expand=False,
  6998. linear=False,
  6999. no_new_id=False,
  7000. appearance=False,
  7001. pretty=False,
  7002. encryption=1,
  7003. permissions=4095,
  7004. owner_pw=None,
  7005. user_pw=None,
  7006. preserve_metadata=1,
  7007. use_objstms=0,
  7008. compression_effort=0,
  7009. ):
  7010. from io import BytesIO
  7011. bio = BytesIO()
  7012. self.save(
  7013. bio,
  7014. garbage=garbage,
  7015. clean=clean,
  7016. no_new_id=no_new_id,
  7017. appearance=appearance,
  7018. deflate=deflate,
  7019. deflate_images=deflate_images,
  7020. deflate_fonts=deflate_fonts,
  7021. incremental=incremental,
  7022. ascii=ascii,
  7023. expand=expand,
  7024. linear=linear,
  7025. pretty=pretty,
  7026. encryption=encryption,
  7027. permissions=permissions,
  7028. owner_pw=owner_pw,
  7029. user_pw=user_pw,
  7030. preserve_metadata=preserve_metadata,
  7031. use_objstms=use_objstms,
  7032. compression_effort=compression_effort,
  7033. )
  7034. return bio.getvalue()
  7035. def tobytes(self, *args, **kwargs):
  7036. return self.write(*args, **kwargs)
  7037. @property
  7038. def xref(self):
  7039. """PDF xref number of page."""
  7040. CheckParent(self)
  7041. return self.parent.page_xref(self.number)
  7042. def xref_copy(doc: 'Document', source: int, target: int, *, keep: list = None) -> None:
  7043. """Copy a PDF dictionary object to another one given their xref numbers.
  7044. Args:
  7045. doc: PDF document object
  7046. source: source xref number
  7047. target: target xref number, the xref must already exist
  7048. keep: an optional list of 1st level keys in target that should not be
  7049. removed before copying.
  7050. Notes:
  7051. This works similar to the copy() method of dictionaries in Python. The
  7052. source may be a stream object.
  7053. """
  7054. if doc.xref_is_stream(source):
  7055. # read new xref stream, maintaining compression
  7056. stream = doc.xref_stream_raw(source)
  7057. doc.update_stream(
  7058. target,
  7059. stream,
  7060. compress=False, # keeps source compression
  7061. new=True, # in case target is no stream
  7062. )
  7063. # empty the target completely, observe exceptions
  7064. if keep is None:
  7065. keep = []
  7066. for key in doc.xref_get_keys(target):
  7067. if key in keep:
  7068. continue
  7069. doc.xref_set_key(target, key, "null")
  7070. # copy over all source dict items
  7071. for key in doc.xref_get_keys(source):
  7072. item = doc.xref_get_key(source, key)
  7073. doc.xref_set_key(target, key, item[1])
  7074. def xref_get_key(self, xref, key):
  7075. """Get PDF dict key value of object at 'xref'."""
  7076. pdf = _as_pdf_document(self)
  7077. xreflen = mupdf.pdf_xref_len(pdf)
  7078. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  7079. raise ValueError( MSG_BAD_XREF)
  7080. if xref > 0:
  7081. obj = mupdf.pdf_load_object(pdf, xref)
  7082. else:
  7083. obj = mupdf.pdf_trailer(pdf)
  7084. if not obj.m_internal:
  7085. return ("null", "null")
  7086. subobj = mupdf.pdf_dict_getp(obj, key)
  7087. if not subobj.m_internal:
  7088. return ("null", "null")
  7089. text = None
  7090. if mupdf.pdf_is_indirect(subobj):
  7091. type = "xref"
  7092. text = "%i 0 R" % mupdf.pdf_to_num(subobj)
  7093. elif mupdf.pdf_is_array(subobj):
  7094. type = "array"
  7095. elif mupdf.pdf_is_dict(subobj):
  7096. type = "dict"
  7097. elif mupdf.pdf_is_int(subobj):
  7098. type = "int"
  7099. text = "%i" % mupdf.pdf_to_int(subobj)
  7100. elif mupdf.pdf_is_real(subobj):
  7101. type = "float"
  7102. elif mupdf.pdf_is_null(subobj):
  7103. type = "null"
  7104. text = "null"
  7105. elif mupdf.pdf_is_bool(subobj):
  7106. type = "bool"
  7107. if mupdf.pdf_to_bool(subobj):
  7108. text = "true"
  7109. else:
  7110. text = "false"
  7111. elif mupdf.pdf_is_name(subobj):
  7112. type = "name"
  7113. text = "/%s" % mupdf.pdf_to_name(subobj)
  7114. elif mupdf.pdf_is_string(subobj):
  7115. type = "string"
  7116. text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj))
  7117. else:
  7118. type = "unknown"
  7119. if text is None:
  7120. res = JM_object_to_buffer(subobj, 1, 0)
  7121. text = JM_UnicodeFromBuffer(res)
  7122. return (type, text)
  7123. def xref_get_keys(self, xref):
  7124. """Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer."""
  7125. pdf = _as_pdf_document(self)
  7126. xreflen = mupdf.pdf_xref_len( pdf)
  7127. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  7128. raise ValueError( MSG_BAD_XREF)
  7129. if xref > 0:
  7130. obj = mupdf.pdf_load_object( pdf, xref)
  7131. else:
  7132. obj = mupdf.pdf_trailer( pdf)
  7133. n = mupdf.pdf_dict_len( obj)
  7134. rc = []
  7135. if n == 0:
  7136. return rc
  7137. for i in range(n):
  7138. key = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( obj, i))
  7139. rc.append(key)
  7140. return rc
  7141. def xref_is_font(self, xref):
  7142. """Check if xref is a font object."""
  7143. if self.is_closed or self.is_encrypted:
  7144. raise ValueError("document closed or encrypted")
  7145. if self.xref_get_key(xref, "Type")[1] == "/Font":
  7146. return True
  7147. return False
  7148. def xref_is_image(self, xref):
  7149. """Check if xref is an image object."""
  7150. if self.is_closed or self.is_encrypted:
  7151. raise ValueError("document closed or encrypted")
  7152. if self.xref_get_key(xref, "Subtype")[1] == "/Image":
  7153. return True
  7154. return False
  7155. def xref_is_stream(self, xref=0):
  7156. """Check if xref is a stream object."""
  7157. pdf = _as_pdf_document(self, required=0)
  7158. if not pdf.m_internal:
  7159. return False # not a PDF
  7160. return bool(mupdf.pdf_obj_num_is_stream(pdf, xref))
  7161. def xref_is_xobject(self, xref):
  7162. """Check if xref is a form xobject."""
  7163. if self.is_closed or self.is_encrypted:
  7164. raise ValueError("document closed or encrypted")
  7165. if self.xref_get_key(xref, "Subtype")[1] == "/Form":
  7166. return True
  7167. return False
  7168. def xref_length(self):
  7169. """Get length of xref table."""
  7170. xreflen = 0
  7171. pdf = _as_pdf_document(self, required=0)
  7172. if pdf.m_internal:
  7173. xreflen = mupdf.pdf_xref_len(pdf)
  7174. return xreflen
  7175. def xref_object(self, xref, compressed=0, ascii=0):
  7176. """Get xref object source as a string."""
  7177. if self.is_closed:
  7178. raise ValueError("document closed")
  7179. if g_use_extra:
  7180. ret = extra.xref_object( self.this, xref, compressed, ascii)
  7181. return ret
  7182. pdf = _as_pdf_document(self)
  7183. xreflen = mupdf.pdf_xref_len(pdf)
  7184. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  7185. raise ValueError( MSG_BAD_XREF)
  7186. if xref > 0:
  7187. obj = mupdf.pdf_load_object(pdf, xref)
  7188. else:
  7189. obj = mupdf.pdf_trailer(pdf)
  7190. res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii)
  7191. text = JM_EscapeStrFromBuffer(res)
  7192. return text
  7193. def xref_set_key(self, xref, key, value):
  7194. """Set the value of a PDF dictionary key."""
  7195. if self.is_closed:
  7196. raise ValueError("document closed")
  7197. if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}):
  7198. raise ValueError("bad 'key'")
  7199. if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set():
  7200. raise ValueError("bad 'value'")
  7201. pdf = _as_pdf_document(self)
  7202. xreflen = mupdf.pdf_xref_len(pdf)
  7203. #if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  7204. # THROWMSG("bad xref")
  7205. #if len(value) == 0:
  7206. # THROWMSG("bad 'value'")
  7207. #if len(key) == 0:
  7208. # THROWMSG("bad 'key'")
  7209. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  7210. raise ValueError( MSG_BAD_XREF)
  7211. if xref != -1:
  7212. obj = mupdf.pdf_load_object(pdf, xref)
  7213. else:
  7214. obj = mupdf.pdf_trailer(pdf)
  7215. new_obj = JM_set_object_value(obj, key, value)
  7216. if not new_obj.m_internal:
  7217. return # did not work: skip update
  7218. if xref != -1:
  7219. mupdf.pdf_update_object(pdf, xref, new_obj)
  7220. else:
  7221. n = mupdf.pdf_dict_len(new_obj)
  7222. for i in range(n):
  7223. mupdf.pdf_dict_put(
  7224. obj,
  7225. mupdf.pdf_dict_get_key(new_obj, i),
  7226. mupdf.pdf_dict_get_val(new_obj, i),
  7227. )
  7228. def xref_stream(self, xref):
  7229. """Get decompressed xref stream."""
  7230. if self.is_closed or self.is_encrypted:
  7231. raise ValueError("document closed or encrypted")
  7232. pdf = _as_pdf_document(self)
  7233. xreflen = mupdf.pdf_xref_len( pdf)
  7234. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  7235. raise ValueError( MSG_BAD_XREF)
  7236. if xref >= 0:
  7237. obj = mupdf.pdf_new_indirect( pdf, xref, 0)
  7238. else:
  7239. obj = mupdf.pdf_trailer( pdf)
  7240. r = None
  7241. if mupdf.pdf_is_stream( obj):
  7242. res = mupdf.pdf_load_stream_number( pdf, xref)
  7243. r = JM_BinFromBuffer( res)
  7244. return r
  7245. def xref_stream_raw(self, xref):
  7246. """Get xref stream without decompression."""
  7247. if self.is_closed or self.is_encrypted:
  7248. raise ValueError("document closed or encrypted")
  7249. pdf = _as_pdf_document(self)
  7250. xreflen = mupdf.pdf_xref_len( pdf)
  7251. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  7252. raise ValueError( MSG_BAD_XREF)
  7253. if xref >= 0:
  7254. obj = mupdf.pdf_new_indirect( pdf, xref, 0)
  7255. else:
  7256. obj = mupdf.pdf_trailer( pdf)
  7257. r = None
  7258. if mupdf.pdf_is_stream( obj):
  7259. res = mupdf.pdf_load_raw_stream_number( pdf, xref)
  7260. r = JM_BinFromBuffer( res)
  7261. return r
  7262. def xref_xml_metadata(self):
  7263. """Get xref of document XML metadata."""
  7264. pdf = _as_pdf_document(self)
  7265. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  7266. if not root.m_internal:
  7267. RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
  7268. xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
  7269. xref = 0
  7270. if xml.m_internal:
  7271. xref = mupdf.pdf_to_num( xml)
  7272. return xref
  7273. __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
  7274. outline = property(lambda self: self._outline)
  7275. is_stream = xref_is_stream
  7276. open = Document
  7277. class DocumentWriter:
  7278. def __enter__(self):
  7279. return self
  7280. def __exit__(self, *args):
  7281. self.close()
  7282. def __init__(self, path, options=''):
  7283. if isinstance( path, str):
  7284. pass
  7285. elif hasattr( path, 'absolute'):
  7286. path = str( path)
  7287. elif hasattr( path, 'name'):
  7288. path = path.name
  7289. if isinstance( path, str):
  7290. self.this = mupdf.FzDocumentWriter( path, options, mupdf.FzDocumentWriter.PathType_PDF)
  7291. else:
  7292. # Need to keep the Python JM_new_output_fileptr_Output instance
  7293. # alive for the lifetime of this DocumentWriter, otherwise calls
  7294. # to virtual methods implemented in Python fail. So we make it a
  7295. # member of this DocumentWriter.
  7296. #
  7297. # Unrelated to this, mupdf.FzDocumentWriter will set
  7298. # self._out.m_internal to null because ownership is passed in.
  7299. #
  7300. out = JM_new_output_fileptr( path)
  7301. self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF)
  7302. assert out.m_internal_value() == 0
  7303. assert hasattr( self.this, '_out')
  7304. def begin_page( self, mediabox):
  7305. mediabox2 = JM_rect_from_py(mediabox)
  7306. device = mupdf.fz_begin_page( self.this, mediabox2)
  7307. device_wrapper = DeviceWrapper( device)
  7308. return device_wrapper
  7309. def close( self):
  7310. mupdf.fz_close_document_writer( self.this)
  7311. def end_page( self):
  7312. mupdf.fz_end_page( self.this)
  7313. class Font:
  7314. def __del__(self):
  7315. if type(self) is not Font:
  7316. return None
  7317. def __init__(
  7318. self,
  7319. fontname=None,
  7320. fontfile=None,
  7321. fontbuffer=None,
  7322. script=0,
  7323. language=None,
  7324. ordering=-1,
  7325. is_bold=0,
  7326. is_italic=0,
  7327. is_serif=0,
  7328. embed=1,
  7329. ):
  7330. if fontbuffer:
  7331. if hasattr(fontbuffer, "getvalue"):
  7332. fontbuffer = fontbuffer.getvalue()
  7333. elif isinstance(fontbuffer, bytearray):
  7334. fontbuffer = bytes(fontbuffer)
  7335. if not isinstance(fontbuffer, bytes):
  7336. raise ValueError("bad type: 'fontbuffer'")
  7337. if isinstance(fontname, str):
  7338. fname_lower = fontname.lower()
  7339. if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower:
  7340. message("Warning: did you mean a fontfile?")
  7341. if fname_lower in ("cjk", "china-t", "china-ts"):
  7342. ordering = 0
  7343. elif fname_lower.startswith("china-s"):
  7344. ordering = 1
  7345. elif fname_lower.startswith("korea"):
  7346. ordering = 3
  7347. elif fname_lower.startswith("japan"):
  7348. ordering = 2
  7349. elif fname_lower in fitz_fontdescriptors.keys():
  7350. import pymupdf_fonts # optional fonts
  7351. fontbuffer = pymupdf_fonts.myfont(fname_lower) # make a copy
  7352. fontname = None # ensure using fontbuffer only
  7353. del pymupdf_fonts # remove package again
  7354. elif ordering < 0:
  7355. fontname = Base14_fontdict.get(fontname, fontname)
  7356. lang = mupdf.fz_text_language_from_string(language)
  7357. font = JM_get_font(fontname, fontfile,
  7358. fontbuffer, script, lang, ordering,
  7359. is_bold, is_italic, is_serif, embed)
  7360. self.this = font
  7361. def __repr__(self):
  7362. return "Font('%s')" % self.name
  7363. @property
  7364. def ascender(self):
  7365. """Return the glyph ascender value."""
  7366. return mupdf.fz_font_ascender(self.this)
  7367. @property
  7368. def bbox(self):
  7369. return self.this.fz_font_bbox()
  7370. @property
  7371. def buffer(self):
  7372. buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer))
  7373. return mupdf.fz_buffer_extract_copy( buffer_)
  7374. def char_lengths(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
  7375. """Return tuple of char lengths of unicode 'text' under a fontsize."""
  7376. lang = mupdf.fz_text_language_from_string(language)
  7377. rc = []
  7378. for ch in text:
  7379. c = ord(ch)
  7380. if small_caps:
  7381. gid = mupdf.fz_encode_character_sc(self.this, c)
  7382. if gid >= 0:
  7383. font = self.this
  7384. else:
  7385. gid, font = mupdf.fz_encode_character_with_fallback(self.this, c, script, lang)
  7386. rc.append(fontsize * mupdf.fz_advance_glyph(font, gid, wmode))
  7387. return rc
  7388. @property
  7389. def descender(self):
  7390. """Return the glyph descender value."""
  7391. return mupdf.fz_font_descender(self.this)
  7392. @property
  7393. def flags(self):
  7394. f = mupdf.ll_fz_font_flags(self.this.m_internal)
  7395. if not f:
  7396. return
  7397. assert isinstance( f, mupdf.fz_font_flags_t)
  7398. #log( '{=f}')
  7399. if mupdf_cppyy:
  7400. # cppyy includes remaining higher bits.
  7401. v = [f.is_mono]
  7402. def b(bits):
  7403. ret = v[0] & ((1 << bits)-1)
  7404. v[0] = v[0] >> bits
  7405. return ret
  7406. is_mono = b(1)
  7407. is_serif = b(1)
  7408. is_bold = b(1)
  7409. is_italic = b(1)
  7410. ft_substitute = b(1)
  7411. ft_stretch = b(1)
  7412. fake_bold = b(1)
  7413. fake_italic = b(1)
  7414. has_opentype = b(1)
  7415. invalid_bbox = b(1)
  7416. cjk_lang = b(1)
  7417. embed = b(1)
  7418. never_embed = b(1)
  7419. return {
  7420. "mono": is_mono if mupdf_cppyy else f.is_mono,
  7421. "serif": is_serif if mupdf_cppyy else f.is_serif,
  7422. "bold": is_bold if mupdf_cppyy else f.is_bold,
  7423. "italic": is_italic if mupdf_cppyy else f.is_italic,
  7424. "substitute": ft_substitute if mupdf_cppyy else f.ft_substitute,
  7425. "stretch": ft_stretch if mupdf_cppyy else f.ft_stretch,
  7426. "fake-bold": fake_bold if mupdf_cppyy else f.fake_bold,
  7427. "fake-italic": fake_italic if mupdf_cppyy else f.fake_italic,
  7428. "opentype": has_opentype if mupdf_cppyy else f.has_opentype,
  7429. "invalid-bbox": invalid_bbox if mupdf_cppyy else f.invalid_bbox,
  7430. 'cjk': cjk_lang if mupdf_cppyy else f.cjk,
  7431. 'cjk-lang': cjk_lang if mupdf_cppyy else f.cjk_lang,
  7432. 'embed': embed if mupdf_cppyy else f.embed,
  7433. 'never-embed': never_embed if mupdf_cppyy else f.never_embed,
  7434. }
  7435. def glyph_advance(self, chr_, language=None, script=0, wmode=0, small_caps=0):
  7436. """Return the glyph width of a unicode (font size 1)."""
  7437. lang = mupdf.fz_text_language_from_string(language)
  7438. if small_caps:
  7439. gid = mupdf.fz_encode_character_sc(self.this, chr_)
  7440. if gid >= 0:
  7441. font = self.this
  7442. else:
  7443. gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr_, script, lang)
  7444. return mupdf.fz_advance_glyph(font, gid, wmode)
  7445. def glyph_bbox(self, char, language=None, script=0, small_caps=0):
  7446. """Return the glyph bbox of a unicode (font size 1)."""
  7447. lang = mupdf.fz_text_language_from_string(language)
  7448. if small_caps:
  7449. gid = mupdf.fz_encode_character_sc( self.this, char)
  7450. if gid >= 0:
  7451. font = self.this
  7452. else:
  7453. gid, font = mupdf.fz_encode_character_with_fallback( self.this, char, script, lang)
  7454. return Rect(mupdf.fz_bound_glyph( font, gid, mupdf.FzMatrix()))
  7455. @property
  7456. def glyph_count(self):
  7457. return self.this.m_internal.glyph_count
  7458. def glyph_name_to_unicode(self, name):
  7459. """Return the unicode for a glyph name."""
  7460. return glyph_name_to_unicode(name)
  7461. def has_glyph(self, chr, language=None, script=0, fallback=0, small_caps=0):
  7462. """Check whether font has a glyph for this unicode."""
  7463. if fallback:
  7464. lang = mupdf.fz_text_language_from_string(language)
  7465. gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr, script, lang)
  7466. else:
  7467. if small_caps:
  7468. gid = mupdf.fz_encode_character_sc(self.this, chr)
  7469. else:
  7470. gid = mupdf.fz_encode_character(self.this, chr)
  7471. return gid
  7472. @property
  7473. def is_bold(self):
  7474. return mupdf.fz_font_is_bold( self.this)
  7475. @property
  7476. def is_italic(self):
  7477. return mupdf.fz_font_is_italic( self.this)
  7478. @property
  7479. def is_monospaced(self):
  7480. return mupdf.fz_font_is_monospaced( self.this)
  7481. @property
  7482. def is_serif(self):
  7483. return mupdf.fz_font_is_serif( self.this)
  7484. @property
  7485. def is_writable(self):
  7486. return True # see pymupdf commit ef4056ee4da2
  7487. font = self.this
  7488. flags = mupdf.ll_fz_font_flags(font.m_internal)
  7489. if mupdf_cppyy:
  7490. # cppyy doesn't handle bitfields correctly.
  7491. import cppyy
  7492. ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags)
  7493. else:
  7494. ft_substitute = flags.ft_substitute
  7495. if ( mupdf.ll_fz_font_t3_procs(font.m_internal)
  7496. or ft_substitute
  7497. or not mupdf.pdf_font_writing_supported(font)
  7498. ):
  7499. return False
  7500. return True
  7501. @property
  7502. def name(self):
  7503. ret = mupdf.fz_font_name(self.this)
  7504. #log( '{ret=}')
  7505. return ret
  7506. def text_length(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
  7507. """Return length of unicode 'text' under a fontsize."""
  7508. thisfont = self.this
  7509. lang = mupdf.fz_text_language_from_string(language)
  7510. rc = 0
  7511. if not isinstance(text, str):
  7512. raise TypeError( MSG_BAD_TEXT)
  7513. for ch in text:
  7514. c = ord(ch)
  7515. if small_caps:
  7516. gid = mupdf.fz_encode_character_sc(thisfont, c)
  7517. if gid >= 0:
  7518. font = thisfont
  7519. else:
  7520. gid, font = mupdf.fz_encode_character_with_fallback(thisfont, c, script, lang)
  7521. rc += mupdf.fz_advance_glyph(font, gid, wmode)
  7522. rc *= fontsize
  7523. return rc
  7524. def unicode_to_glyph_name(self, ch):
  7525. """Return the glyph name for a unicode."""
  7526. return unicode_to_glyph_name(ch)
  7527. def valid_codepoints(self):
  7528. '''
  7529. Returns sorted list of valid unicodes of a fz_font.
  7530. '''
  7531. ucs_gids = mupdf.fz_enumerate_font_cmap2(self.this)
  7532. ucss = [i.ucs for i in ucs_gids]
  7533. ucss_unique = set(ucss)
  7534. ucss_unique_sorted = sorted(ucss_unique)
  7535. return ucss_unique_sorted
  7536. class Graftmap:
  7537. def __del__(self):
  7538. if not type(self) is Graftmap:
  7539. return
  7540. self.thisown = False
  7541. def __init__(self, doc):
  7542. dst = _as_pdf_document(doc)
  7543. map_ = mupdf.pdf_new_graft_map(dst)
  7544. self.this = map_
  7545. self.thisown = True
  7546. class Link:
  7547. def __del__(self):
  7548. self._erase()
  7549. def __init__( self, this):
  7550. assert isinstance( this, mupdf.FzLink)
  7551. self.this = this
  7552. def __repr__(self):
  7553. CheckParent(self)
  7554. return "link on " + str(self.parent)
  7555. def __str__(self):
  7556. CheckParent(self)
  7557. return "link on " + str(self.parent)
  7558. def _border(self, doc, xref):
  7559. pdf = _as_pdf_document(doc, required=0)
  7560. if not pdf.m_internal:
  7561. return
  7562. link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  7563. if not link_obj.m_internal:
  7564. return
  7565. b = JM_annot_border(link_obj)
  7566. return b
  7567. def _colors(self, doc, xref):
  7568. pdf = _as_pdf_document(doc, required=0)
  7569. if not pdf.m_internal:
  7570. return
  7571. link_obj = mupdf.pdf_new_indirect( pdf, xref, 0)
  7572. if not link_obj.m_internal:
  7573. raise ValueError( MSG_BAD_XREF)
  7574. b = JM_annot_colors( link_obj)
  7575. return b
  7576. def _erase(self):
  7577. self.parent = None
  7578. self.thisown = False
  7579. def _setBorder(self, border, doc, xref):
  7580. pdf = _as_pdf_document(doc, required=0)
  7581. if not pdf.m_internal:
  7582. return
  7583. link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  7584. if not link_obj.m_internal:
  7585. return
  7586. b = JM_annot_set_border(border, pdf, link_obj)
  7587. return b
  7588. @property
  7589. def border(self):
  7590. return self._border(self.parent.parent.this, self.xref)
  7591. @property
  7592. def colors(self):
  7593. return self._colors(self.parent.parent.this, self.xref)
  7594. @property
  7595. def dest(self):
  7596. """Create link destination details."""
  7597. if hasattr(self, "parent") and self.parent is None:
  7598. raise ValueError("orphaned object: parent is None")
  7599. if self.parent.parent.is_closed or self.parent.parent.is_encrypted:
  7600. raise ValueError("document closed or encrypted")
  7601. doc = self.parent.parent
  7602. if self.is_external or self.uri.startswith("#"):
  7603. uri = None
  7604. else:
  7605. uri = doc.resolve_link(self.uri)
  7606. return linkDest(self, uri, doc)
  7607. @property
  7608. def flags(self)->int:
  7609. CheckParent(self)
  7610. doc = self.parent.parent
  7611. if not doc.is_pdf:
  7612. return 0
  7613. f = doc.xref_get_key(self.xref, "F")
  7614. if f[1] != "null":
  7615. return int(f[1])
  7616. return 0
  7617. @property
  7618. def is_external(self):
  7619. """Flag the link as external."""
  7620. CheckParent(self)
  7621. if g_use_extra:
  7622. return extra.Link_is_external( self.this)
  7623. this_link = self.this
  7624. if not this_link.m_internal or not this_link.m_internal.uri:
  7625. return False
  7626. return bool( mupdf.fz_is_external_link( this_link.m_internal.uri))
  7627. @property
  7628. def next(self):
  7629. """Next link."""
  7630. if not self.this.m_internal:
  7631. return None
  7632. CheckParent(self)
  7633. if 0 and g_use_extra:
  7634. val = extra.Link_next( self.this)
  7635. else:
  7636. val = self.this.next()
  7637. if not val.m_internal:
  7638. return None
  7639. val = Link( val)
  7640. if val:
  7641. val.thisown = True
  7642. val.parent = self.parent # copy owning page from prev link
  7643. val.parent._annot_refs[id(val)] = val
  7644. if self.xref > 0: # prev link has an xref
  7645. link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
  7646. link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
  7647. idx = link_xrefs.index(self.xref)
  7648. val.xref = link_xrefs[idx + 1]
  7649. val.id = link_ids[idx + 1]
  7650. else:
  7651. val.xref = 0
  7652. val.id = ""
  7653. return val
  7654. @property
  7655. def rect(self):
  7656. """Rectangle ('hot area')."""
  7657. CheckParent(self)
  7658. # utils.py:getLinkDict() appears to expect exceptions from us, so we
  7659. # ensure that we raise on error.
  7660. if self.this is None or not self.this.m_internal:
  7661. raise Exception( 'self.this.m_internal not available')
  7662. val = JM_py_from_rect( self.this.rect())
  7663. val = Rect(val)
  7664. return val
  7665. def set_border(self, border=None, width=0, dashes=None, style=None):
  7666. if type(border) is not dict:
  7667. border = {"width": width, "style": style, "dashes": dashes}
  7668. return self._setBorder(border, self.parent.parent.this, self.xref)
  7669. def set_colors(self, colors=None, stroke=None, fill=None):
  7670. """Set border colors."""
  7671. CheckParent(self)
  7672. doc = self.parent.parent
  7673. if type(colors) is not dict:
  7674. colors = {"fill": fill, "stroke": stroke}
  7675. fill = colors.get("fill")
  7676. stroke = colors.get("stroke")
  7677. if fill is not None:
  7678. message("warning: links have no fill color")
  7679. if stroke in ([], ()):
  7680. doc.xref_set_key(self.xref, "C", "[]")
  7681. return
  7682. if hasattr(stroke, "__float__"):
  7683. stroke = [float(stroke)]
  7684. CheckColor(stroke)
  7685. assert len(stroke) in (1, 3, 4)
  7686. s = f"[{_format_g(stroke)}]"
  7687. doc.xref_set_key(self.xref, "C", s)
  7688. def set_flags(self, flags):
  7689. CheckParent(self)
  7690. doc = self.parent.parent
  7691. if not doc.is_pdf:
  7692. raise ValueError("is no PDF")
  7693. if not type(flags) is int:
  7694. raise ValueError("bad 'flags' value")
  7695. doc.xref_set_key(self.xref, "F", str(flags))
  7696. return None
  7697. @property
  7698. def uri(self):
  7699. """Uri string."""
  7700. #CheckParent(self)
  7701. if g_use_extra:
  7702. return extra.link_uri(self.this)
  7703. this_link = self.this
  7704. return this_link.m_internal.uri if this_link.m_internal else ''
  7705. page = -1
  7706. class Matrix:
  7707. def __abs__(self):
  7708. return math.sqrt(sum([c*c for c in self]))
  7709. def __add__(self, m):
  7710. if hasattr(m, "__float__"):
  7711. return Matrix(self.a + m, self.b + m, self.c + m,
  7712. self.d + m, self.e + m, self.f + m)
  7713. if len(m) != 6:
  7714. raise ValueError("Matrix: bad seq len")
  7715. return Matrix(self.a + m[0], self.b + m[1], self.c + m[2],
  7716. self.d + m[3], self.e + m[4], self.f + m[5])
  7717. def __bool__(self):
  7718. return not (max(self) == min(self) == 0)
  7719. def __eq__(self, mat):
  7720. if not hasattr(mat, "__len__"):
  7721. return False
  7722. return len(mat) == 6 and not (self - mat)
  7723. def __getitem__(self, i):
  7724. return (self.a, self.b, self.c, self.d, self.e, self.f)[i]
  7725. def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None):
  7726. """
  7727. Matrix() - all zeros
  7728. Matrix(a, b, c, d, e, f)
  7729. Matrix(zoom-x, zoom-y) - zoom
  7730. Matrix(shear-x, shear-y, 1) - shear
  7731. Matrix(degree) - rotate
  7732. Matrix(Matrix) - new copy
  7733. Matrix(sequence) - from 'sequence'
  7734. Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix.
  7735. Explicit keyword args a, b, c, d, e, f override any earlier settings if
  7736. not None.
  7737. """
  7738. if not args:
  7739. self.a = self.b = self.c = self.d = self.e = self.f = 0.0
  7740. elif len(args) > 6:
  7741. raise ValueError("Matrix: bad seq len")
  7742. elif len(args) == 6: # 6 numbers
  7743. self.a, self.b, self.c, self.d, self.e, self.f = map(float, args)
  7744. elif len(args) == 1: # either an angle or a sequ
  7745. if isinstance(args[0], mupdf.FzMatrix):
  7746. self.a = args[0].a
  7747. self.b = args[0].b
  7748. self.c = args[0].c
  7749. self.d = args[0].d
  7750. self.e = args[0].e
  7751. self.f = args[0].f
  7752. elif hasattr(args[0], "__float__"):
  7753. theta = math.radians(args[0])
  7754. c_ = round(math.cos(theta), 8)
  7755. s_ = round(math.sin(theta), 8)
  7756. self.a = self.d = c_
  7757. self.b = s_
  7758. self.c = -s_
  7759. self.e = self.f = 0.0
  7760. else:
  7761. self.a, self.b, self.c, self.d, self.e, self.f = map(float, args[0])
  7762. elif len(args) == 2 or len(args) == 3 and args[2] == 0:
  7763. self.a, self.b, self.c, self.d, self.e, self.f = float(args[0]), \
  7764. 0.0, 0.0, float(args[1]), 0.0, 0.0
  7765. elif len(args) == 3 and args[2] == 1:
  7766. self.a, self.b, self.c, self.d, self.e, self.f = 1.0, \
  7767. float(args[1]), float(args[0]), 1.0, 0.0, 0.0
  7768. else:
  7769. raise ValueError("Matrix: bad args")
  7770. # Override with explicit args if specified.
  7771. if a is not None: self.a = a
  7772. if b is not None: self.b = b
  7773. if c is not None: self.c = c
  7774. if d is not None: self.d = d
  7775. if e is not None: self.e = e
  7776. if f is not None: self.f = f
  7777. def __invert__(self):
  7778. """Calculate inverted matrix."""
  7779. m1 = Matrix()
  7780. m1.invert(self)
  7781. return m1
  7782. def __len__(self):
  7783. return 6
  7784. def __mul__(self, m):
  7785. if hasattr(m, "__float__"):
  7786. return Matrix(self.a * m, self.b * m, self.c * m,
  7787. self.d * m, self.e * m, self.f * m)
  7788. m1 = Matrix(1,1)
  7789. return m1.concat(self, m)
  7790. def __neg__(self):
  7791. return Matrix(-self.a, -self.b, -self.c, -self.d, -self.e, -self.f)
  7792. def __nonzero__(self):
  7793. return not (max(self) == min(self) == 0)
  7794. def __pos__(self):
  7795. return Matrix(self)
  7796. def __repr__(self):
  7797. return "Matrix" + str(tuple(self))
  7798. def __setitem__(self, i, v):
  7799. v = float(v)
  7800. if i == 0: self.a = v
  7801. elif i == 1: self.b = v
  7802. elif i == 2: self.c = v
  7803. elif i == 3: self.d = v
  7804. elif i == 4: self.e = v
  7805. elif i == 5: self.f = v
  7806. else:
  7807. raise IndexError("index out of range")
  7808. return
  7809. def __sub__(self, m):
  7810. if hasattr(m, "__float__"):
  7811. return Matrix(self.a - m, self.b - m, self.c - m,
  7812. self.d - m, self.e - m, self.f - m)
  7813. if len(m) != 6:
  7814. raise ValueError("Matrix: bad seq len")
  7815. return Matrix(self.a - m[0], self.b - m[1], self.c - m[2],
  7816. self.d - m[3], self.e - m[4], self.f - m[5])
  7817. def __truediv__(self, m):
  7818. if hasattr(m, "__float__"):
  7819. return Matrix(self.a * 1./m, self.b * 1./m, self.c * 1./m,
  7820. self.d * 1./m, self.e * 1./m, self.f * 1./m)
  7821. m1 = util_invert_matrix(m)[1]
  7822. if not m1:
  7823. raise ZeroDivisionError("matrix not invertible")
  7824. m2 = Matrix(1,1)
  7825. return m2.concat(self, m1)
  7826. def concat(self, one, two):
  7827. """Multiply two matrices and replace current one."""
  7828. if not len(one) == len(two) == 6:
  7829. raise ValueError("Matrix: bad seq len")
  7830. self.a, self.b, self.c, self.d, self.e, self.f = util_concat_matrix(one, two)
  7831. return self
  7832. def invert(self, src=None):
  7833. """Calculate the inverted matrix. Return 0 if successful and replace
  7834. current one. Else return 1 and do nothing.
  7835. """
  7836. if src is None:
  7837. dst = util_invert_matrix(self)
  7838. else:
  7839. dst = util_invert_matrix(src)
  7840. if dst[0] == 1:
  7841. return 1
  7842. self.a, self.b, self.c, self.d, self.e, self.f = dst[1]
  7843. return 0
  7844. @property
  7845. def is_rectilinear(self):
  7846. """True if rectangles are mapped to rectangles."""
  7847. return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \
  7848. (abs(self.a) < EPSILON and abs(self.d) < EPSILON)
  7849. def prerotate(self, theta):
  7850. """Calculate pre rotation and replace current matrix."""
  7851. theta = float(theta)
  7852. while theta < 0: theta += 360
  7853. while theta >= 360: theta -= 360
  7854. if abs(0 - theta) < EPSILON:
  7855. pass
  7856. elif abs(90.0 - theta) < EPSILON:
  7857. a = self.a
  7858. b = self.b
  7859. self.a = self.c
  7860. self.b = self.d
  7861. self.c = -a
  7862. self.d = -b
  7863. elif abs(180.0 - theta) < EPSILON:
  7864. self.a = -self.a
  7865. self.b = -self.b
  7866. self.c = -self.c
  7867. self.d = -self.d
  7868. elif abs(270.0 - theta) < EPSILON:
  7869. a = self.a
  7870. b = self.b
  7871. self.a = -self.c
  7872. self.b = -self.d
  7873. self.c = a
  7874. self.d = b
  7875. else:
  7876. rad = math.radians(theta)
  7877. s = math.sin(rad)
  7878. c = math.cos(rad)
  7879. a = self.a
  7880. b = self.b
  7881. self.a = c * a + s * self.c
  7882. self.b = c * b + s * self.d
  7883. self.c =-s * a + c * self.c
  7884. self.d =-s * b + c * self.d
  7885. return self
  7886. def prescale(self, sx, sy):
  7887. """Calculate pre scaling and replace current matrix."""
  7888. sx = float(sx)
  7889. sy = float(sy)
  7890. self.a *= sx
  7891. self.b *= sx
  7892. self.c *= sy
  7893. self.d *= sy
  7894. return self
  7895. def preshear(self, h, v):
  7896. """Calculate pre shearing and replace current matrix."""
  7897. h = float(h)
  7898. v = float(v)
  7899. a, b = self.a, self.b
  7900. self.a += v * self.c
  7901. self.b += v * self.d
  7902. self.c += h * a
  7903. self.d += h * b
  7904. return self
  7905. def pretranslate(self, tx, ty):
  7906. """Calculate pre translation and replace current matrix."""
  7907. tx = float(tx)
  7908. ty = float(ty)
  7909. self.e += tx * self.a + ty * self.c
  7910. self.f += tx * self.b + ty * self.d
  7911. return self
  7912. __inv__ = __invert__
  7913. __div__ = __truediv__
  7914. norm = __abs__
  7915. class IdentityMatrix(Matrix):
  7916. """Identity matrix [1, 0, 0, 1, 0, 0]"""
  7917. def __hash__(self):
  7918. return hash((1,0,0,1,0,0))
  7919. def __init__(self):
  7920. Matrix.__init__(self, 1.0, 1.0)
  7921. def __repr__(self):
  7922. return "IdentityMatrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)"
  7923. def __setattr__(self, name, value):
  7924. if name in "ad":
  7925. self.__dict__[name] = 1.0
  7926. elif name in "bcef":
  7927. self.__dict__[name] = 0.0
  7928. else:
  7929. self.__dict__[name] = value
  7930. def checkargs(*args):
  7931. raise NotImplementedError("Identity is readonly")
  7932. Identity = IdentityMatrix()
  7933. class linkDest:
  7934. """link or outline destination details"""
  7935. def __init__(self, obj, rlink, document=None):
  7936. isExt = obj.is_external
  7937. isInt = not isExt
  7938. self.dest = ""
  7939. self.file_spec = ""
  7940. self.flags = 0
  7941. self.is_map = False
  7942. self.is_uri = False
  7943. self.kind = LINK_NONE
  7944. self.lt = Point(0, 0)
  7945. self.named = dict()
  7946. self.new_window = ""
  7947. self.page = obj.page
  7948. self.rb = Point(0, 0)
  7949. self.uri = obj.uri
  7950. def uri_to_dict(uri):
  7951. items = self.uri[1:].split('&')
  7952. ret = dict()
  7953. for item in items:
  7954. eq = item.find('=')
  7955. if eq >= 0:
  7956. ret[item[:eq]] = item[eq+1:]
  7957. else:
  7958. ret[item] = None
  7959. return ret
  7960. def unescape(name):
  7961. """Unescape '%AB' substrings to chr(0xAB)."""
  7962. split = name.replace("%%", "%25") # take care of escaped '%'
  7963. split = split.split("%")
  7964. newname = split[0]
  7965. for item in split[1:]:
  7966. piece = item[:2]
  7967. newname += chr(int(piece, base=16))
  7968. newname += item[2:]
  7969. return newname
  7970. if rlink and not self.uri.startswith("#"):
  7971. self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}"
  7972. if obj.is_external:
  7973. self.page = -1
  7974. self.kind = LINK_URI
  7975. if not self.uri:
  7976. self.page = -1
  7977. self.kind = LINK_NONE
  7978. if isInt and self.uri:
  7979. self.uri = self.uri.replace("&zoom=nan", "&zoom=0")
  7980. if self.uri.startswith("#"):
  7981. self.kind = LINK_GOTO
  7982. m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri)
  7983. if m:
  7984. self.page = int(m.group(1)) - 1
  7985. self.lt = Point(float((m.group(3))), float(m.group(4)))
  7986. self.flags = self.flags | LINK_FLAG_L_VALID | LINK_FLAG_T_VALID
  7987. else:
  7988. m = re.match('^#page=([0-9]+)$', self.uri)
  7989. if m:
  7990. self.page = int(m.group(1)) - 1
  7991. else:
  7992. self.kind = LINK_NAMED
  7993. m = re.match('^#nameddest=(.*)', self.uri)
  7994. assert document
  7995. if document and m:
  7996. named = unescape(m.group(1))
  7997. self.named = document.resolve_names().get(named)
  7998. if self.named is None:
  7999. # document.resolve_names() does not contain an
  8000. # entry for `named` so use an empty dict.
  8001. self.named = dict()
  8002. self.named['nameddest'] = named
  8003. else:
  8004. self.named = uri_to_dict(self.uri[1:])
  8005. else:
  8006. self.kind = LINK_NAMED
  8007. self.named = uri_to_dict(self.uri)
  8008. if obj.is_external:
  8009. if not self.uri:
  8010. pass
  8011. elif self.uri.startswith("file:"):
  8012. self.file_spec = self.uri[5:]
  8013. if self.file_spec.startswith("//"):
  8014. self.file_spec = self.file_spec[2:]
  8015. self.is_uri = False
  8016. self.uri = ""
  8017. self.kind = LINK_LAUNCH
  8018. ftab = self.file_spec.split("#")
  8019. if len(ftab) == 2:
  8020. if ftab[1].startswith("page="):
  8021. self.kind = LINK_GOTOR
  8022. self.file_spec = ftab[0]
  8023. self.page = int(ftab[1].split("&")[0][5:]) - 1
  8024. elif ":" in self.uri:
  8025. self.is_uri = True
  8026. self.kind = LINK_URI
  8027. else:
  8028. self.is_uri = True
  8029. self.kind = LINK_LAUNCH
  8030. assert isinstance(self.named, dict)
  8031. class Widget:
  8032. '''
  8033. Class describing a PDF form field ("widget")
  8034. '''
  8035. def __init__(self):
  8036. self.border_color = None
  8037. self.border_style = "S"
  8038. self.border_width = 0
  8039. self.border_dashes = None
  8040. self.choice_values = None # choice fields only
  8041. self.rb_parent = None # radio buttons only: xref of owning parent
  8042. self.field_name = None # field name
  8043. self.field_label = None # field label
  8044. self.field_value = None
  8045. self.field_flags = 0
  8046. self.field_display = 0
  8047. self.field_type = 0 # valid range 1 through 7
  8048. self.field_type_string = None # field type as string
  8049. self.fill_color = None
  8050. self.button_caption = None # button caption
  8051. self.is_signed = None # True / False if signature
  8052. self.text_color = (0, 0, 0)
  8053. self.text_font = "Helv"
  8054. self.text_fontsize = 0
  8055. self.text_maxlen = 0 # text fields only
  8056. self.text_format = 0 # text fields only
  8057. self._text_da = "" # /DA = default appearance
  8058. self.script = None # JavaScript (/A)
  8059. self.script_stroke = None # JavaScript (/AA/K)
  8060. self.script_format = None # JavaScript (/AA/F)
  8061. self.script_change = None # JavaScript (/AA/V)
  8062. self.script_calc = None # JavaScript (/AA/C)
  8063. self.script_blur = None # JavaScript (/AA/Bl)
  8064. self.script_focus = None # JavaScript (/AA/Fo) codespell:ignore
  8065. self.rect = None # annot value
  8066. self.xref = 0 # annot value
  8067. def __repr__(self):
  8068. #return "'%s' widget on %s" % (self.field_type_string, str(self.parent))
  8069. # No self.parent.
  8070. return f'Widget:(field_type={self.field_type_string} script={self.script})'
  8071. return "'%s' widget" % (self.field_type_string)
  8072. def _adjust_font(self):
  8073. """Ensure text_font is from our list and correctly spelled.
  8074. """
  8075. if not self.text_font:
  8076. self.text_font = "Helv"
  8077. return
  8078. valid_fonts = ("Cour", "TiRo", "Helv", "ZaDb")
  8079. for f in valid_fonts:
  8080. if self.text_font.lower() == f.lower():
  8081. self.text_font = f
  8082. return
  8083. self.text_font = "Helv"
  8084. return
  8085. def _checker(self):
  8086. """Any widget type checks.
  8087. """
  8088. if self.field_type not in range(1, 8):
  8089. raise ValueError("bad field type")
  8090. # if setting a radio button to ON, first set Off all buttons
  8091. # in the group - this is not done by MuPDF:
  8092. if self.field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON and self.field_value not in (False, "Off") and hasattr(self, "parent"):
  8093. # so we are about setting this button to ON/True
  8094. # check other buttons in same group and set them to 'Off'
  8095. doc = self.parent.parent
  8096. kids_type, kids_value = doc.xref_get_key(self.xref, "Parent/Kids")
  8097. if kids_type == "array":
  8098. xrefs = tuple(map(int, kids_value[1:-1].replace("0 R","").split()))
  8099. for xref in xrefs:
  8100. if xref != self.xref:
  8101. doc.xref_set_key(xref, "AS", "/Off")
  8102. # the calling method will now set the intended button to on and
  8103. # will find everything prepared for correct functioning.
  8104. def _parse_da(self):
  8105. """Extract font name, size and color from default appearance string (/DA object).
  8106. Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'.
  8107. """
  8108. if not self._text_da:
  8109. return
  8110. font = "Helv"
  8111. fsize = 0
  8112. col = (0, 0, 0)
  8113. dat = self._text_da.split() # split on any whitespace
  8114. for i, item in enumerate(dat):
  8115. if item == "Tf":
  8116. font = dat[i - 2][1:]
  8117. fsize = float(dat[i - 1])
  8118. dat[i] = dat[i-1] = dat[i-2] = ""
  8119. continue
  8120. if item == "g": # unicolor text
  8121. col = [(float(dat[i - 1]))]
  8122. dat[i] = dat[i-1] = ""
  8123. continue
  8124. if item == "rg": # RGB colored text
  8125. col = [float(f) for f in dat[i - 3:i]]
  8126. dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
  8127. continue
  8128. self.text_font = font
  8129. self.text_fontsize = fsize
  8130. self.text_color = col
  8131. self._text_da = ""
  8132. return
  8133. def _validate(self):
  8134. """Validate the class entries.
  8135. """
  8136. if (self.rect.is_infinite
  8137. or self.rect.is_empty
  8138. ):
  8139. raise ValueError("bad rect")
  8140. if not self.field_name:
  8141. raise ValueError("field name missing")
  8142. if self.field_label == "Unnamed":
  8143. self.field_label = None
  8144. CheckColor(self.border_color)
  8145. CheckColor(self.fill_color)
  8146. if not self.text_color:
  8147. self.text_color = (0, 0, 0)
  8148. CheckColor(self.text_color)
  8149. if not self.border_width:
  8150. self.border_width = 0
  8151. if not self.text_fontsize:
  8152. self.text_fontsize = 0
  8153. self.border_style = self.border_style.upper()[0:1]
  8154. # standardize content of JavaScript entries
  8155. btn_type = self.field_type in (
  8156. mupdf.PDF_WIDGET_TYPE_BUTTON,
  8157. mupdf.PDF_WIDGET_TYPE_CHECKBOX,
  8158. mupdf.PDF_WIDGET_TYPE_RADIOBUTTON,
  8159. )
  8160. if not self.script:
  8161. self.script = None
  8162. elif type(self.script) is not str:
  8163. raise ValueError("script content must be a string")
  8164. # buttons cannot have the following script actions
  8165. if btn_type or not self.script_calc:
  8166. self.script_calc = None
  8167. elif type(self.script_calc) is not str:
  8168. raise ValueError("script_calc content must be a string")
  8169. if btn_type or not self.script_change:
  8170. self.script_change = None
  8171. elif type(self.script_change) is not str:
  8172. raise ValueError("script_change content must be a string")
  8173. if btn_type or not self.script_format:
  8174. self.script_format = None
  8175. elif type(self.script_format) is not str:
  8176. raise ValueError("script_format content must be a string")
  8177. if btn_type or not self.script_stroke:
  8178. self.script_stroke = None
  8179. elif type(self.script_stroke) is not str:
  8180. raise ValueError("script_stroke content must be a string")
  8181. if btn_type or not self.script_blur:
  8182. self.script_blur = None
  8183. elif type(self.script_blur) is not str:
  8184. raise ValueError("script_blur content must be a string")
  8185. if btn_type or not self.script_focus:
  8186. self.script_focus = None
  8187. elif type(self.script_focus) is not str:
  8188. raise ValueError("script_focus content must be a string")
  8189. self._checker() # any field_type specific checks
  8190. def _sync_flags(self):
  8191. """Propagate the field flags.
  8192. If this widget has a "/Parent", set its field flags and that of all
  8193. its /Kids widgets to the value of the current widget.
  8194. Only possible for widgets existing in the PDF.
  8195. Returns True or False.
  8196. """
  8197. if not self.xref:
  8198. return False # no xref: widget not in the PDF
  8199. doc = self.parent.parent # the owning document
  8200. assert doc
  8201. pdf = _as_pdf_document(doc)
  8202. # load underlying PDF object
  8203. pdf_widget = mupdf.pdf_load_object(pdf, self.xref)
  8204. Parent = mupdf.pdf_dict_get(pdf_widget, PDF_NAME("Parent"))
  8205. if not Parent.pdf_is_dict():
  8206. return False # no /Parent: nothing to do
  8207. # put the field flags value into the parent field flags:
  8208. Parent.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
  8209. # also put that value into all kids of the Parent
  8210. kids = Parent.pdf_dict_get(PDF_NAME("Kids"))
  8211. if not kids.pdf_is_array():
  8212. message("warning: malformed PDF, Parent has no Kids array")
  8213. return False # no /Kids: should never happen!
  8214. for i in range(kids.pdf_array_len()): # walk through all kids
  8215. # access kid widget, and do some precautionary checks
  8216. kid = kids.pdf_array_get(i)
  8217. if not kid.pdf_is_dict():
  8218. continue
  8219. xref = kid.pdf_to_num() # get xref of the kid
  8220. if xref == self.xref: # skip self widget
  8221. continue
  8222. subtype = kid.pdf_dict_get(PDF_NAME("Subtype"))
  8223. if not subtype.pdf_to_name() == "Widget":
  8224. continue
  8225. # put the field flags value into the kid field flags:
  8226. kid.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
  8227. return True # all done
  8228. def button_states(self):
  8229. """Return the on/off state names for button widgets.
  8230. A button may have 'normal' or 'pressed down' appearances. While the 'Off'
  8231. state is usually called like this, the 'On' state is often given a name
  8232. relating to the functional context.
  8233. """
  8234. if self.field_type not in (2, 5):
  8235. return None # no button type
  8236. if hasattr(self, "parent"): # field already exists on page
  8237. doc = self.parent.parent
  8238. else:
  8239. return
  8240. xref = self.xref
  8241. states = {"normal": None, "down": None}
  8242. APN = doc.xref_get_key(xref, "AP/N")
  8243. if APN[0] == "dict":
  8244. nstates = []
  8245. APN = APN[1][2:-2]
  8246. apnt = APN.split("/")[1:]
  8247. for x in apnt:
  8248. nstates.append(x.split()[0])
  8249. states["normal"] = nstates
  8250. if APN[0] == "xref":
  8251. nstates = []
  8252. nxref = int(APN[1].split(" ")[0])
  8253. APN = doc.xref_object(nxref)
  8254. apnt = APN.split("/")[1:]
  8255. for x in apnt:
  8256. nstates.append(x.split()[0])
  8257. states["normal"] = nstates
  8258. APD = doc.xref_get_key(xref, "AP/D")
  8259. if APD[0] == "dict":
  8260. dstates = []
  8261. APD = APD[1][2:-2]
  8262. apdt = APD.split("/")[1:]
  8263. for x in apdt:
  8264. dstates.append(x.split()[0])
  8265. states["down"] = dstates
  8266. if APD[0] == "xref":
  8267. dstates = []
  8268. dxref = int(APD[1].split(" ")[0])
  8269. APD = doc.xref_object(dxref)
  8270. apdt = APD.split("/")[1:]
  8271. for x in apdt:
  8272. dstates.append(x.split()[0])
  8273. states["down"] = dstates
  8274. return states
  8275. @property
  8276. def next(self):
  8277. return self._annot.next
  8278. def on_state(self):
  8279. """Return the "On" value for button widgets.
  8280. This is useful for radio buttons mainly. Checkboxes will always return
  8281. "Yes". Radio buttons will return the string that is unequal to "Off"
  8282. as returned by method button_states().
  8283. If the radio button is new / being created, it does not yet have an
  8284. "On" value. In this case, a warning is shown and True is returned.
  8285. """
  8286. if self.field_type not in (2, 5):
  8287. return None # no checkbox or radio button
  8288. bstate = self.button_states()
  8289. if bstate is None:
  8290. bstate = dict()
  8291. for k in bstate.keys():
  8292. for v in bstate[k]:
  8293. if v != "Off":
  8294. return v
  8295. message("warning: radio button has no 'On' value.")
  8296. return True
  8297. def reset(self):
  8298. """Reset the field value to its default.
  8299. """
  8300. TOOLS._reset_widget(self._annot)
  8301. def update(self, sync_flags=False):
  8302. """Reflect Python object in the PDF."""
  8303. self._validate()
  8304. self._adjust_font() # ensure valid text_font name
  8305. # now create the /DA string
  8306. self._text_da = ""
  8307. if len(self.text_color) == 3:
  8308. fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + self._text_da
  8309. elif len(self.text_color) == 1:
  8310. fmt = "{:g} g /{f:s} {s:g} Tf" + self._text_da
  8311. elif len(self.text_color) == 4:
  8312. fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" + self._text_da
  8313. self._text_da = fmt.format(*self.text_color, f=self.text_font,
  8314. s=self.text_fontsize)
  8315. # finally update the widget
  8316. # if widget has a '/AA/C' script, make sure it is in the '/CO'
  8317. # array of the '/AcroForm' dictionary.
  8318. if self.script_calc: # there is a "calculation" script:
  8319. # make sure we are in the /CO array
  8320. util_ensure_widget_calc(self._annot)
  8321. # finally update the widget
  8322. TOOLS._save_widget(self._annot, self)
  8323. self._text_da = ""
  8324. if sync_flags:
  8325. self._sync_flags() # propagate field flags to parent and kids
  8326. from . import _extra
  8327. class Outline:
  8328. def __init__(self, ol):
  8329. self.this = ol
  8330. @property
  8331. def dest(self):
  8332. '''outline destination details'''
  8333. return linkDest(self, None, None)
  8334. def destination(self, document):
  8335. '''
  8336. Like `dest` property but uses `document` to resolve destinations for
  8337. kind=LINK_NAMED.
  8338. '''
  8339. return linkDest(self, None, document)
  8340. @property
  8341. def down(self):
  8342. ol = self.this
  8343. down_ol = ol.down()
  8344. if not down_ol.m_internal:
  8345. return
  8346. return Outline(down_ol)
  8347. @property
  8348. def is_external(self):
  8349. if g_use_extra:
  8350. # calling _extra.* here appears to save significant time in
  8351. # test_toc.py:test_full_toc, 1.2s=>0.94s.
  8352. #
  8353. return _extra.Outline_is_external( self.this)
  8354. ol = self.this
  8355. if not ol.m_internal:
  8356. return False
  8357. uri = ol.m_internal.uri if 1 else ol.uri()
  8358. if uri is None:
  8359. return False
  8360. return mupdf.fz_is_external_link(uri)
  8361. @property
  8362. def is_open(self):
  8363. if 1:
  8364. return self.this.m_internal.is_open
  8365. return self.this.is_open()
  8366. @property
  8367. def next(self):
  8368. ol = self.this
  8369. next_ol = ol.next()
  8370. if not next_ol.m_internal:
  8371. return
  8372. return Outline(next_ol)
  8373. @property
  8374. def page(self):
  8375. if 1:
  8376. return self.this.m_internal.page.page
  8377. return self.this.page().page
  8378. @property
  8379. def title(self):
  8380. return self.this.m_internal.title
  8381. @property
  8382. def uri(self):
  8383. ol = self.this
  8384. if not ol.m_internal:
  8385. return None
  8386. return ol.m_internal.uri
  8387. @property
  8388. def x(self):
  8389. return self.this.m_internal.x
  8390. @property
  8391. def y(self):
  8392. return self.this.m_internal.y
  8393. __slots__ = [ 'this']
  8394. def _make_PdfFilterOptions(
  8395. recurse=0,
  8396. instance_forms=0,
  8397. ascii=0,
  8398. no_update=0,
  8399. sanitize=0,
  8400. sopts=None,
  8401. ):
  8402. '''
  8403. Returns a mupdf.PdfFilterOptions instance.
  8404. '''
  8405. filter_ = mupdf.PdfFilterOptions()
  8406. filter_.recurse = recurse
  8407. filter_.instance_forms = instance_forms
  8408. filter_.ascii = ascii
  8409. filter_.no_update = no_update
  8410. if sanitize:
  8411. # We want to use a PdfFilterFactory whose `.filter` fn pointer is
  8412. # set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to
  8413. # get access to this raw fn in Python; and on Windows raw MuPDF
  8414. # functions are not even available to C++.
  8415. #
  8416. # So we use SWIG Director to implement our own
  8417. # PdfFilterFactory whose `filter()` method calls
  8418. # `mupdf.ll_pdf_new_sanitize_filter()`.
  8419. if sopts:
  8420. assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions)
  8421. else:
  8422. sopts = mupdf.PdfSanitizeFilterOptions()
  8423. class Factory(mupdf.PdfFilterFactory2):
  8424. def __init__(self):
  8425. super().__init__()
  8426. self.use_virtual_filter()
  8427. self.sopts = sopts
  8428. def filter(self, ctx, doc, chain, struct_parents, transform, options):
  8429. if 0:
  8430. log(f'sanitize filter.filter():')
  8431. log(f' {self=}')
  8432. log(f' {ctx=}')
  8433. log(f' {doc=}')
  8434. log(f' {chain=}')
  8435. log(f' {struct_parents=}')
  8436. log(f' {transform=}')
  8437. log(f' {options=}')
  8438. log(f' {self.sopts.internal()=}')
  8439. return mupdf.ll_pdf_new_sanitize_filter(
  8440. doc,
  8441. chain,
  8442. struct_parents,
  8443. transform,
  8444. options,
  8445. self.sopts.internal(),
  8446. )
  8447. factory = Factory()
  8448. filter_.add_factory(factory.internal())
  8449. filter_._factory = factory
  8450. return filter_
  8451. class Page:
  8452. def __init__(self, page, document):
  8453. assert isinstance(page, (mupdf.FzPage, mupdf.PdfPage)), f'page is: {page}'
  8454. self.this = page
  8455. self.thisown = True
  8456. self.last_point = None
  8457. self.draw_cont = ''
  8458. self._annot_refs = dict()
  8459. self.parent = document
  8460. if page.m_internal:
  8461. if isinstance( page, mupdf.PdfPage):
  8462. self.number = page.m_internal.super.number
  8463. else:
  8464. self.number = page.m_internal.number
  8465. else:
  8466. self.number = None
  8467. def __repr__(self):
  8468. return self.__str__()
  8469. CheckParent(self)
  8470. x = self.parent.name
  8471. if self.parent.stream is not None:
  8472. x = "<memory, doc# %i>" % (self.parent._graft_id,)
  8473. if x == "":
  8474. x = "<new PDF, doc# %i>" % self.parent._graft_id
  8475. return "page %s of %s" % (self.number, x)
  8476. def __str__(self):
  8477. #CheckParent(self)
  8478. parent = getattr(self, 'parent', None)
  8479. if isinstance(self.this.m_internal, mupdf.pdf_page):
  8480. number = self.this.m_internal.super.number
  8481. else:
  8482. number = self.this.m_internal.number
  8483. ret = f'page {number}'
  8484. if parent:
  8485. x = self.parent.name
  8486. if self.parent.stream is not None:
  8487. x = "<memory, doc# %i>" % (self.parent._graft_id,)
  8488. if x == "":
  8489. x = "<new PDF, doc# %i>" % self.parent._graft_id
  8490. ret += f' of {x}'
  8491. return ret
  8492. def _add_caret_annot(self, point):
  8493. if g_use_extra:
  8494. annot = extra._add_caret_annot( self.this, JM_point_from_py(point))
  8495. else:
  8496. page = self._pdf_page()
  8497. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_CARET)
  8498. if point:
  8499. p = JM_point_from_py(point)
  8500. r = mupdf.pdf_annot_rect(annot)
  8501. r = mupdf.FzRect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
  8502. mupdf.pdf_set_annot_rect(annot, r)
  8503. mupdf.pdf_update_annot(annot)
  8504. JM_add_annot_id(annot, "A")
  8505. return annot
  8506. def _add_file_annot(self, point, buffer_, filename, ufilename=None, desc=None, icon=None):
  8507. page = self._pdf_page()
  8508. uf = ufilename if ufilename else filename
  8509. d = desc if desc else filename
  8510. p = JM_point_from_py(point)
  8511. filebuf = JM_BufferFromBytes(buffer_)
  8512. if not filebuf.m_internal:
  8513. raise TypeError( MSG_BAD_BUFFER)
  8514. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FILE_ATTACHMENT)
  8515. r = mupdf.pdf_annot_rect(annot)
  8516. r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
  8517. mupdf.pdf_set_annot_rect(annot, r)
  8518. flags = mupdf.PDF_ANNOT_IS_PRINT
  8519. mupdf.pdf_set_annot_flags(annot, flags)
  8520. if icon:
  8521. mupdf.pdf_set_annot_icon_name(annot, icon)
  8522. val = JM_embed_file(page.doc(), filebuf, filename, uf, d, 1)
  8523. mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('FS'), val)
  8524. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('Contents'), filename)
  8525. mupdf.pdf_update_annot(annot)
  8526. mupdf.pdf_set_annot_rect(annot, r)
  8527. mupdf.pdf_set_annot_flags(annot, flags)
  8528. JM_add_annot_id(annot, "A")
  8529. return Annot(annot)
  8530. def _add_freetext_annot(
  8531. self, rect,
  8532. text,
  8533. fontsize=11,
  8534. fontname=None,
  8535. text_color=None,
  8536. fill_color=None,
  8537. border_color=None,
  8538. border_width=0,
  8539. dashes=None,
  8540. callout=None,
  8541. line_end=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
  8542. opacity=1,
  8543. align=0,
  8544. rotate=0,
  8545. richtext=False,
  8546. style=None,
  8547. ):
  8548. rc = f"""<?xml version="1.0"?>
  8549. <body xmlns="http://www.w3.org/1999/xtml"
  8550. xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"
  8551. xfa:contentType="text/html" xfa:APIVersion="Acrobat:8.0.0" xfa:spec="2.4">
  8552. {text}"""
  8553. page = self._pdf_page()
  8554. if border_color and not richtext:
  8555. raise ValueError("cannot set border_color if rich_text is False")
  8556. if border_color and not text_color:
  8557. text_color = border_color
  8558. nfcol, fcol = JM_color_FromSequence(fill_color)
  8559. ntcol, tcol = JM_color_FromSequence(text_color)
  8560. r = JM_rect_from_py(rect)
  8561. if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
  8562. raise ValueError( MSG_BAD_RECT)
  8563. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FREE_TEXT)
  8564. annot_obj = mupdf.pdf_annot_obj(annot)
  8565. #insert text as 'contents' or 'RC' depending on 'richtext'
  8566. if not richtext:
  8567. mupdf.pdf_set_annot_contents(annot, text)
  8568. else:
  8569. mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("RC"), rc)
  8570. if style:
  8571. mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("DS"), style)
  8572. mupdf.pdf_set_annot_rect(annot, r)
  8573. while rotate < 0:
  8574. rotate += 360
  8575. while rotate >= 360:
  8576. rotate -= 360
  8577. if rotate != 0:
  8578. mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
  8579. mupdf.pdf_set_annot_quadding(annot, align)
  8580. if nfcol > 0:
  8581. mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
  8582. mupdf.pdf_set_annot_border_width(annot, border_width)
  8583. mupdf.pdf_set_annot_opacity(annot, opacity)
  8584. if dashes:
  8585. for d in dashes:
  8586. mupdf.pdf_add_annot_border_dash_item(annot, float(d))
  8587. # Insert callout information
  8588. if callout:
  8589. mupdf.pdf_dict_put(annot_obj, PDF_NAME("IT"), PDF_NAME("FreeTextCallout"))
  8590. mupdf.pdf_set_annot_callout_style(annot, line_end)
  8591. point_count = len(callout)
  8592. extra.JM_set_annot_callout_line(annot, tuple(callout), point_count)
  8593. # insert the default appearance string
  8594. if not richtext:
  8595. JM_make_annot_DA(annot, ntcol, tcol, fontname, fontsize)
  8596. mupdf.pdf_update_annot(annot)
  8597. JM_add_annot_id(annot, "A")
  8598. val = Annot(annot)
  8599. return val
  8600. def _add_ink_annot(self, list):
  8601. page = _as_pdf_page(self.this)
  8602. if not PySequence_Check(list):
  8603. raise ValueError( MSG_BAD_ARG_INK_ANNOT)
  8604. ctm = mupdf.FzMatrix()
  8605. mupdf.pdf_page_transform(page, mupdf.FzRect(0), ctm)
  8606. inv_ctm = mupdf.fz_invert_matrix(ctm)
  8607. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_INK)
  8608. annot_obj = mupdf.pdf_annot_obj(annot)
  8609. n0 = len(list)
  8610. inklist = mupdf.pdf_new_array(page.doc(), n0)
  8611. for j in range(n0):
  8612. sublist = list[j]
  8613. n1 = len(sublist)
  8614. stroke = mupdf.pdf_new_array(page.doc(), 2 * n1)
  8615. for i in range(n1):
  8616. p = sublist[i]
  8617. if not PySequence_Check(p) or PySequence_Size(p) != 2:
  8618. raise ValueError( MSG_BAD_ARG_INK_ANNOT)
  8619. point = mupdf.fz_transform_point(JM_point_from_py(p), inv_ctm)
  8620. mupdf.pdf_array_push_real(stroke, point.x)
  8621. mupdf.pdf_array_push_real(stroke, point.y)
  8622. mupdf.pdf_array_push(inklist, stroke)
  8623. mupdf.pdf_dict_put(annot_obj, PDF_NAME('InkList'), inklist)
  8624. mupdf.pdf_update_annot(annot)
  8625. JM_add_annot_id(annot, "A")
  8626. return Annot(annot)
  8627. def _add_line_annot(self, p1, p2):
  8628. page = self._pdf_page()
  8629. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE)
  8630. a = JM_point_from_py(p1)
  8631. b = JM_point_from_py(p2)
  8632. mupdf.pdf_set_annot_line(annot, a, b)
  8633. mupdf.pdf_update_annot(annot)
  8634. JM_add_annot_id(annot, "A")
  8635. assert annot.m_internal
  8636. return Annot(annot)
  8637. def _add_multiline(self, points, annot_type):
  8638. page = self._pdf_page()
  8639. if len(points) < 2:
  8640. raise ValueError( MSG_BAD_ARG_POINTS)
  8641. annot = mupdf.pdf_create_annot(page, annot_type)
  8642. for p in points:
  8643. if (PySequence_Size(p) != 2):
  8644. raise ValueError( MSG_BAD_ARG_POINTS)
  8645. point = JM_point_from_py(p)
  8646. mupdf.pdf_add_annot_vertex(annot, point)
  8647. mupdf.pdf_update_annot(annot)
  8648. JM_add_annot_id(annot, "A")
  8649. return Annot(annot)
  8650. def _add_redact_annot(self, quad, text=None, da_str=None, align=0, fill=None, text_color=None):
  8651. page = self._pdf_page()
  8652. fcol = [ 1, 1, 1, 0]
  8653. nfcol = 0
  8654. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_REDACT)
  8655. q = JM_quad_from_py(quad)
  8656. r = mupdf.fz_rect_from_quad(q)
  8657. # TODO calculate de-rotated rect
  8658. mupdf.pdf_set_annot_rect(annot, r)
  8659. if fill:
  8660. nfcol, fcol = JM_color_FromSequence(fill)
  8661. arr = mupdf.pdf_new_array(page.doc(), nfcol)
  8662. for i in range(nfcol):
  8663. mupdf.pdf_array_push_real(arr, fcol[i])
  8664. mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('IC'), arr)
  8665. if text:
  8666. assert da_str
  8667. mupdf.pdf_dict_puts(
  8668. mupdf.pdf_annot_obj(annot),
  8669. "OverlayText",
  8670. mupdf.pdf_new_text_string(text),
  8671. )
  8672. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('DA'), da_str)
  8673. mupdf.pdf_dict_put_int(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'), align)
  8674. mupdf.pdf_update_annot(annot)
  8675. JM_add_annot_id(annot, "A")
  8676. annot = mupdf.ll_pdf_keep_annot(annot.m_internal)
  8677. annot = mupdf.PdfAnnot( annot)
  8678. return Annot(annot)
  8679. def _add_square_or_circle(self, rect, annot_type):
  8680. page = self._pdf_page()
  8681. r = JM_rect_from_py(rect)
  8682. if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
  8683. raise ValueError( MSG_BAD_RECT)
  8684. annot = mupdf.pdf_create_annot(page, annot_type)
  8685. mupdf.pdf_set_annot_rect(annot, r)
  8686. mupdf.pdf_update_annot(annot)
  8687. JM_add_annot_id(annot, "A")
  8688. assert annot.m_internal
  8689. return Annot(annot)
  8690. def _add_stamp_annot(self, rect, stamp=0):
  8691. rect = Rect(rect)
  8692. r = JM_rect_from_py(rect)
  8693. if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
  8694. raise ValueError(MSG_BAD_RECT)
  8695. page = self._pdf_page()
  8696. stamp_id = [
  8697. "Approved",
  8698. "AsIs",
  8699. "Confidential",
  8700. "Departmental",
  8701. "Experimental",
  8702. "Expired",
  8703. "Final",
  8704. "ForComment",
  8705. "ForPublicRelease",
  8706. "NotApproved",
  8707. "NotForPublicRelease",
  8708. "Sold",
  8709. "TopSecret",
  8710. "Draft",
  8711. ]
  8712. n = len(stamp_id)
  8713. buf = None
  8714. name = None
  8715. if stamp in range(n):
  8716. name = stamp_id[stamp]
  8717. elif isinstance(stamp, Pixmap):
  8718. buf = stamp.tobytes()
  8719. elif isinstance(stamp, str):
  8720. buf = pathlib.Path(stamp).read_bytes()
  8721. elif isinstance(stamp, (bytes, bytearray)):
  8722. buf = stamp
  8723. elif isinstance(stamp, io.BytesIO):
  8724. buf = stamp.getvalue()
  8725. else:
  8726. name = stamp_id[0]
  8727. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_STAMP)
  8728. if buf: # image stamp
  8729. fzbuff = mupdf.fz_new_buffer_from_copied_data(buf)
  8730. img = mupdf.fz_new_image_from_buffer(fzbuff)
  8731. # compute image boundary box on page
  8732. w, h = img.w(), img.h()
  8733. scale = min(rect.width / w, rect.height / h)
  8734. width = w * scale # bbox width
  8735. height = h * scale # bbox height
  8736. # center of "rect"
  8737. center = (rect.tl + rect.br) / 2
  8738. x0 = center.x - width / 2
  8739. y0 = center.y - height / 2
  8740. x1 = x0 + width
  8741. y1 = y0 + height
  8742. r = mupdf.fz_make_rect(x0, y0, x1, y1)
  8743. mupdf.pdf_set_annot_rect(annot, r)
  8744. mupdf.pdf_set_annot_stamp_image(annot, img)
  8745. mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), mupdf.pdf_new_name("ImageStamp"))
  8746. mupdf.pdf_set_annot_contents(annot, "Image Stamp")
  8747. else: # text stamp
  8748. mupdf.pdf_set_annot_rect(annot, r)
  8749. mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), PDF_NAME(name))
  8750. mupdf.pdf_set_annot_contents(annot, name)
  8751. mupdf.pdf_update_annot(annot)
  8752. JM_add_annot_id(annot, "A")
  8753. return Annot(annot)
  8754. def _add_text_annot(self, point, text, icon=None):
  8755. page = self._pdf_page()
  8756. p = JM_point_from_py( point)
  8757. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT)
  8758. r = mupdf.pdf_annot_rect(annot)
  8759. r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
  8760. mupdf.pdf_set_annot_rect(annot, r)
  8761. mupdf.pdf_set_annot_contents(annot, text)
  8762. if icon:
  8763. mupdf.pdf_set_annot_icon_name(annot, icon)
  8764. mupdf.pdf_update_annot(annot)
  8765. JM_add_annot_id(annot, "A")
  8766. return Annot(annot)
  8767. def _add_text_marker(self, quads, annot_type):
  8768. CheckParent(self)
  8769. if not self.parent.is_pdf:
  8770. raise ValueError("is no PDF")
  8771. val = Page__add_text_marker(self, quads, annot_type)
  8772. if not val:
  8773. return None
  8774. val.parent = weakref.proxy(self)
  8775. self._annot_refs[id(val)] = val
  8776. return val
  8777. def _addAnnot_FromString(self, linklist):
  8778. """Add links from list of object sources."""
  8779. CheckParent(self)
  8780. if g_use_extra:
  8781. self.__class__._addAnnot_FromString = extra.Page_addAnnot_FromString
  8782. #log('Page._addAnnot_FromString() deferring to extra.Page_addAnnot_FromString().')
  8783. return extra.Page_addAnnot_FromString( self.this, linklist)
  8784. page = _as_pdf_page(self.this)
  8785. lcount = len(linklist) # link count
  8786. if lcount < 1:
  8787. return
  8788. i = -1
  8789. # insert links from the provided sources
  8790. if not isinstance(linklist, tuple):
  8791. raise ValueError( "bad 'linklist' argument")
  8792. if not mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')).m_internal:
  8793. mupdf.pdf_dict_put_array( page.obj(), PDF_NAME('Annots'), lcount)
  8794. annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
  8795. assert annots.m_internal, f'{lcount=} {annots.m_internal=}'
  8796. for i in range(lcount):
  8797. txtpy = linklist[i]
  8798. text = JM_StrAsChar(txtpy)
  8799. if not text:
  8800. message("skipping bad link / annot item %i.", i)
  8801. continue
  8802. try:
  8803. annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text))
  8804. ind_obj = mupdf.pdf_new_indirect( page.doc(), mupdf.pdf_to_num( annot), 0)
  8805. mupdf.pdf_array_push( annots, ind_obj)
  8806. except Exception:
  8807. if g_exceptions_verbose: exception_info()
  8808. message("skipping bad link / annot item %i.\n" % i)
  8809. def _addWidget(self, field_type, field_name):
  8810. page = self._pdf_page()
  8811. pdf = page.doc()
  8812. annot = JM_create_widget(pdf, page, field_type, field_name)
  8813. if not annot.m_internal:
  8814. raise RuntimeError( "cannot create widget")
  8815. JM_add_annot_id(annot, "W")
  8816. return Annot(annot)
  8817. def _apply_redactions(self, text, images, graphics):
  8818. page = self._pdf_page()
  8819. opts = mupdf.PdfRedactOptions()
  8820. opts.black_boxes = 0 # no black boxes
  8821. opts.text = text # how to treat text
  8822. opts.image_method = images # how to treat images
  8823. opts.line_art = graphics # how to treat vector graphics
  8824. success = mupdf.pdf_redact_page(page.doc(), page, opts)
  8825. return success
  8826. def _erase(self):
  8827. self._reset_annot_refs()
  8828. try:
  8829. self.parent._forget_page(self)
  8830. except Exception:
  8831. exception_info()
  8832. pass
  8833. self.parent = None
  8834. self.thisown = False
  8835. self.number = None
  8836. self.this = None
  8837. def _count_q_balance(self):
  8838. """Count missing graphic state pushs and pops.
  8839. Returns:
  8840. A pair of integers (push, pop). Push is the number of missing
  8841. PDF "q" commands, pop is the number of "Q" commands.
  8842. A balanced graphics state for the page will be reached if its
  8843. /Contents is prepended with 'push' copies of string "q\n"
  8844. and appended with 'pop' copies of "\nQ".
  8845. """
  8846. page = _as_pdf_page(self) # need the underlying PDF page
  8847. res = mupdf.pdf_dict_get( # access /Resources
  8848. page.obj(),
  8849. mupdf.PDF_ENUM_NAME_Resources,
  8850. )
  8851. cont = mupdf.pdf_dict_get( # access /Contents
  8852. page.obj(),
  8853. mupdf.PDF_ENUM_NAME_Contents,
  8854. )
  8855. pdf = _as_pdf_document(self.parent) # need underlying PDF document
  8856. # return value of MuPDF function
  8857. return mupdf.pdf_count_q_balance_outparams_fn(pdf, res, cont)
  8858. def _get_optional_content(self, oc: OptInt) -> OptStr:
  8859. if oc is None or oc == 0:
  8860. return None
  8861. doc = self.parent
  8862. check = doc.xref_object(oc, compressed=True)
  8863. if not ("/Type/OCG" in check or "/Type/OCMD" in check):
  8864. #log( 'raising "bad optional content"')
  8865. raise ValueError("bad optional content: 'oc'")
  8866. #log( 'Looking at self._get_resource_properties()')
  8867. props = {}
  8868. for p, x in self._get_resource_properties():
  8869. props[x] = p
  8870. if oc in props.keys():
  8871. return props[oc]
  8872. i = 0
  8873. mc = "MC%i" % i
  8874. while mc in props.values():
  8875. i += 1
  8876. mc = "MC%i" % i
  8877. self._set_resource_property(mc, oc)
  8878. #log( 'returning {mc=}')
  8879. return mc
  8880. def _get_resource_properties(self):
  8881. '''
  8882. page list Resource/Properties
  8883. '''
  8884. page = self._pdf_page()
  8885. rc = JM_get_resource_properties(page.obj())
  8886. return rc
  8887. def _get_textpage(self, clip=None, flags=0, matrix=None):
  8888. if 1 or g_use_extra:
  8889. ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
  8890. tpage = mupdf.FzStextPage(ll_tpage)
  8891. return tpage
  8892. page = self.this
  8893. options = mupdf.FzStextOptions(flags)
  8894. rect = JM_rect_from_py(clip)
  8895. # Default to page's rect if `clip` not specified, for #2048.
  8896. rect = mupdf.fz_bound_page(page) if clip is None else JM_rect_from_py(clip)
  8897. ctm = JM_matrix_from_py(matrix)
  8898. tpage = mupdf.FzStextPage(rect)
  8899. dev = mupdf.fz_new_stext_device(tpage, options)
  8900. if _globals.no_device_caching:
  8901. mupdf.fz_enable_device_hints( dev, mupdf.FZ_NO_CACHE)
  8902. if isinstance(page, mupdf.FzPage):
  8903. pass
  8904. elif isinstance(page, mupdf.PdfPage):
  8905. page = page.super()
  8906. else:
  8907. assert 0, f'Unrecognised {type(page)=}'
  8908. mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie())
  8909. mupdf.fz_close_device(dev)
  8910. return tpage
  8911. def _insert_image(self,
  8912. filename=None, pixmap=None, stream=None, imask=None, clip=None,
  8913. overlay=1, rotate=0, keep_proportion=1, oc=0, width=0, height=0,
  8914. xref=0, alpha=-1, _imgname=None, digests=None
  8915. ):
  8916. maskbuf = mupdf.FzBuffer()
  8917. page = self._pdf_page()
  8918. # This will create an empty PdfDocument with a call to
  8919. # pdf_new_document() then assign page.doc()'s return value to it (which
  8920. # drop the original empty pdf_document).
  8921. pdf = page.doc()
  8922. w = width
  8923. h = height
  8924. img_xref = xref
  8925. rc_digest = 0
  8926. do_process_pixmap = 1
  8927. do_process_stream = 1
  8928. do_have_imask = 1
  8929. do_have_image = 1
  8930. do_have_xref = 1
  8931. if xref > 0:
  8932. ref = mupdf.pdf_new_indirect(pdf, xref, 0)
  8933. w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
  8934. h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
  8935. if w + h == 0:
  8936. raise ValueError( MSG_IS_NO_IMAGE)
  8937. #goto have_xref()
  8938. do_process_pixmap = 0
  8939. do_process_stream = 0
  8940. do_have_imask = 0
  8941. do_have_image = 0
  8942. else:
  8943. if stream:
  8944. imgbuf = JM_BufferFromBytes(stream)
  8945. do_process_pixmap = 0
  8946. else:
  8947. if filename:
  8948. imgbuf = mupdf.fz_read_file(filename)
  8949. #goto have_stream()
  8950. do_process_pixmap = 0
  8951. if do_process_pixmap:
  8952. #log( 'do_process_pixmap')
  8953. # process pixmap ---------------------------------
  8954. arg_pix = pixmap.this
  8955. w = arg_pix.w()
  8956. h = arg_pix.h()
  8957. digest = mupdf.fz_md5_pixmap2(arg_pix)
  8958. md5_py = digest
  8959. temp = digests.get(md5_py, None)
  8960. if temp is not None:
  8961. img_xref = temp
  8962. ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
  8963. #goto have_xref()
  8964. do_process_stream = 0
  8965. do_have_imask = 0
  8966. do_have_image = 0
  8967. else:
  8968. if arg_pix.alpha() == 0:
  8969. image = mupdf.fz_new_image_from_pixmap(arg_pix, mupdf.FzImage())
  8970. else:
  8971. pm = mupdf.fz_convert_pixmap(
  8972. arg_pix,
  8973. mupdf.FzColorspace(),
  8974. mupdf.FzColorspace(),
  8975. mupdf.FzDefaultColorspaces(None),
  8976. mupdf.FzColorParams(),
  8977. 1,
  8978. )
  8979. pm.alpha = 0
  8980. pm.colorspace = None
  8981. mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage())
  8982. image = mupdf.fz_new_image_from_pixmap(arg_pix, mask)
  8983. #goto have_image()
  8984. do_process_stream = 0
  8985. do_have_imask = 0
  8986. if do_process_stream:
  8987. #log( 'do_process_stream')
  8988. # process stream ---------------------------------
  8989. state = mupdf.FzMd5()
  8990. if mupdf_cppyy:
  8991. mupdf.fz_md5_update_buffer( state, imgbuf)
  8992. else:
  8993. mupdf.fz_md5_update(state, imgbuf.m_internal.data, imgbuf.m_internal.len)
  8994. if imask:
  8995. maskbuf = JM_BufferFromBytes(imask)
  8996. if mupdf_cppyy:
  8997. mupdf.fz_md5_update_buffer( state, maskbuf)
  8998. else:
  8999. mupdf.fz_md5_update(state, maskbuf.m_internal.data, maskbuf.m_internal.len)
  9000. digest = mupdf.fz_md5_final2(state)
  9001. md5_py = bytes(digest)
  9002. temp = digests.get(md5_py, None)
  9003. if temp is not None:
  9004. img_xref = temp
  9005. ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
  9006. w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
  9007. h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
  9008. #goto have_xref()
  9009. do_have_imask = 0
  9010. do_have_image = 0
  9011. else:
  9012. image = mupdf.fz_new_image_from_buffer(imgbuf)
  9013. w = image.w()
  9014. h = image.h()
  9015. if not imask:
  9016. #goto have_image()
  9017. do_have_imask = 0
  9018. if do_have_imask:
  9019. # `fz_compressed_buffer` is reference counted and
  9020. # `mupdf.fz_new_image_from_compressed_buffer2()`
  9021. # is povided as a Swig-friendly wrapper for
  9022. # `fz_new_image_from_compressed_buffer()`, so we can do things
  9023. # straightfowardly.
  9024. #
  9025. cbuf1 = mupdf.fz_compressed_image_buffer( image)
  9026. if not cbuf1.m_internal:
  9027. raise ValueError( "uncompressed image cannot have mask")
  9028. bpc = image.bpc()
  9029. colorspace = image.colorspace()
  9030. xres, yres = mupdf.fz_image_resolution(image)
  9031. mask = mupdf.fz_new_image_from_buffer(maskbuf)
  9032. image = mupdf.fz_new_image_from_compressed_buffer2(
  9033. w,
  9034. h,
  9035. bpc,
  9036. colorspace,
  9037. xres,
  9038. yres,
  9039. 1, # interpolate
  9040. 0, # imagemask,
  9041. list(), # decode
  9042. list(), # colorkey
  9043. cbuf1,
  9044. mask,
  9045. )
  9046. if do_have_image:
  9047. #log( 'do_have_image')
  9048. ref = mupdf.pdf_add_image(pdf, image)
  9049. if oc:
  9050. JM_add_oc_object(pdf, ref, oc)
  9051. img_xref = mupdf.pdf_to_num(ref)
  9052. digests[md5_py] = img_xref
  9053. rc_digest = 1
  9054. if do_have_xref:
  9055. #log( 'do_have_xref')
  9056. resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
  9057. if not resources.m_internal:
  9058. resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
  9059. xobject = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
  9060. if not xobject.m_internal:
  9061. xobject = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 2)
  9062. mat = calc_image_matrix(w, h, clip, rotate, keep_proportion)
  9063. mupdf.pdf_dict_puts(xobject, _imgname, ref)
  9064. nres = mupdf.fz_new_buffer(50)
  9065. s = f"\nq\n{_format_g((mat.a, mat.b, mat.c, mat.d, mat.e, mat.f))} cm\n/{_imgname} Do\nQ\n"
  9066. #s = s.replace('\n', '\r\n')
  9067. mupdf.fz_append_string(nres, s)
  9068. JM_insert_contents(pdf, page.obj(), nres, overlay)
  9069. if rc_digest:
  9070. return img_xref, digests
  9071. else:
  9072. return img_xref, None
  9073. def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
  9074. page = self._pdf_page()
  9075. pdf = page.doc()
  9076. value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering)
  9077. # get the objects /Resources, /Resources/Font
  9078. resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
  9079. if not resources.pdf_is_dict():
  9080. resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME("Resources"), 5)
  9081. fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
  9082. if not fonts.m_internal: # page has no fonts yet
  9083. fonts = mupdf.pdf_new_dict(pdf, 5)
  9084. mupdf.pdf_dict_putl(page.obj(), fonts, PDF_NAME('Resources'), PDF_NAME('Font'))
  9085. # store font in resources and fonts objects will contain named reference to font
  9086. _, xref = JM_INT_ITEM(value, 0)
  9087. if not xref:
  9088. raise RuntimeError( "cannot insert font")
  9089. font_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  9090. mupdf.pdf_dict_puts(fonts, fontname, font_obj)
  9091. return value
  9092. def _load_annot(self, name, xref):
  9093. page = self._pdf_page()
  9094. if xref == 0:
  9095. annot = JM_get_annot_by_name(page, name)
  9096. else:
  9097. annot = JM_get_annot_by_xref(page, xref)
  9098. if annot.m_internal:
  9099. return Annot(annot)
  9100. def _makePixmap(self, doc, ctm, cs, alpha=0, annots=1, clip=None):
  9101. pix = JM_pixmap_from_page(doc, self.this, ctm, cs, alpha, annots, clip)
  9102. return Pixmap(pix)
  9103. def _other_box(self, boxtype):
  9104. rect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
  9105. page = _as_pdf_page(self.this, required=False)
  9106. if page.m_internal:
  9107. obj = mupdf.pdf_dict_gets( page.obj(), boxtype)
  9108. if mupdf.pdf_is_array(obj):
  9109. rect = mupdf.pdf_to_rect(obj)
  9110. if mupdf.fz_is_infinite_rect( rect):
  9111. return
  9112. return JM_py_from_rect(rect)
  9113. def _pdf_page(self, required=True):
  9114. return _as_pdf_page(self.this, required=required)
  9115. def _reset_annot_refs(self):
  9116. """Invalidate / delete all annots of this page."""
  9117. self._annot_refs.clear()
  9118. def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None):
  9119. if CA >= 1 and ca >= 1 and blendmode is None:
  9120. return
  9121. tCA = int(round(max(CA , 0) * 100))
  9122. if tCA >= 100:
  9123. tCA = 99
  9124. tca = int(round(max(ca, 0) * 100))
  9125. if tca >= 100:
  9126. tca = 99
  9127. gstate = "fitzca%02i%02i" % (tCA, tca)
  9128. if not gstate:
  9129. return
  9130. page = _as_pdf_page(self.this)
  9131. resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
  9132. if not resources.m_internal:
  9133. resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
  9134. extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
  9135. if not extg.m_internal:
  9136. extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), 2)
  9137. n = mupdf.pdf_dict_len(extg)
  9138. for i in range(n):
  9139. o1 = mupdf.pdf_dict_get_key(extg, i)
  9140. name = mupdf.pdf_to_name(o1)
  9141. if name == gstate:
  9142. return gstate
  9143. opa = mupdf.pdf_new_dict(page.doc(), 3)
  9144. mupdf.pdf_dict_put_real(opa, PDF_NAME('CA'), CA)
  9145. mupdf.pdf_dict_put_real(opa, PDF_NAME('ca'), ca)
  9146. mupdf.pdf_dict_puts(extg, gstate, opa)
  9147. return gstate
  9148. def _set_pagebox(self, boxtype, rect):
  9149. doc = self.parent
  9150. if doc is None:
  9151. raise ValueError("orphaned object: parent is None")
  9152. if not doc.is_pdf:
  9153. raise ValueError("is no PDF")
  9154. valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox")
  9155. if boxtype not in valid_boxes:
  9156. raise ValueError("bad boxtype")
  9157. rect = Rect(rect)
  9158. mb = self.mediabox
  9159. rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
  9160. if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1):
  9161. raise ValueError(f"{boxtype} not in MediaBox")
  9162. doc.xref_set_key(self.xref, boxtype, f"[{_format_g(tuple(rect))}]")
  9163. def _set_resource_property(self, name, xref):
  9164. page = self._pdf_page()
  9165. JM_set_resource_property(page.obj(), name, xref)
  9166. def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None):
  9167. cropbox = JM_rect_from_py(clip)
  9168. mat = JM_matrix_from_py(matrix)
  9169. rc_xref = xref
  9170. tpage = _as_pdf_page(self.this)
  9171. tpageref = tpage.obj()
  9172. pdfout = tpage.doc() # target PDF
  9173. ENSURE_OPERATION(pdfout)
  9174. #-------------------------------------------------------------
  9175. # convert the source page to a Form XObject
  9176. #-------------------------------------------------------------
  9177. xobj1 = JM_xobject_from_page(pdfout, fz_srcpage, xref, graftmap.this)
  9178. if not rc_xref:
  9179. rc_xref = mupdf.pdf_to_num(xobj1)
  9180. #-------------------------------------------------------------
  9181. # create referencing XObject (controls display on target page)
  9182. #-------------------------------------------------------------
  9183. # fill reference to xobj1 into the /Resources
  9184. #-------------------------------------------------------------
  9185. subres1 = mupdf.pdf_new_dict(pdfout, 5)
  9186. mupdf.pdf_dict_puts(subres1, "fullpage", xobj1)
  9187. subres = mupdf.pdf_new_dict(pdfout, 5)
  9188. mupdf.pdf_dict_put(subres, PDF_NAME('XObject'), subres1)
  9189. res = mupdf.fz_new_buffer(20)
  9190. mupdf.fz_append_string(res, "/fullpage Do")
  9191. xobj2 = mupdf.pdf_new_xobject(pdfout, cropbox, mat, subres, res)
  9192. if oc > 0:
  9193. JM_add_oc_object(pdfout, mupdf.pdf_resolve_indirect(xobj2), oc)
  9194. #-------------------------------------------------------------
  9195. # update target page with xobj2:
  9196. #-------------------------------------------------------------
  9197. # 1. insert Xobject in Resources
  9198. #-------------------------------------------------------------
  9199. resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
  9200. if not resources.m_internal:
  9201. resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'), 5)
  9202. subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
  9203. if not subres.m_internal:
  9204. subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
  9205. mupdf.pdf_dict_puts(subres, _imgname, xobj2)
  9206. #-------------------------------------------------------------
  9207. # 2. make and insert new Contents object
  9208. #-------------------------------------------------------------
  9209. nres = mupdf.fz_new_buffer(50) # buffer for Do-command
  9210. mupdf.fz_append_string(nres, " q /") # Do-command
  9211. mupdf.fz_append_string(nres, _imgname)
  9212. mupdf.fz_append_string(nres, " Do Q ")
  9213. JM_insert_contents(pdfout, tpageref, nres, overlay)
  9214. return rc_xref
  9215. def add_caret_annot(self, point: point_like) -> Annot:
  9216. """Add a 'Caret' annotation."""
  9217. old_rotation = annot_preprocess(self)
  9218. try:
  9219. annot = self._add_caret_annot(point)
  9220. finally:
  9221. if old_rotation != 0:
  9222. self.set_rotation(old_rotation)
  9223. annot = Annot( annot)
  9224. annot_postprocess(self, annot)
  9225. assert hasattr( annot, 'parent')
  9226. return annot
  9227. def add_circle_annot(self, rect: rect_like) -> Annot:
  9228. """Add a 'Circle' (ellipse, oval) annotation."""
  9229. old_rotation = annot_preprocess(self)
  9230. try:
  9231. annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_CIRCLE)
  9232. finally:
  9233. if old_rotation != 0:
  9234. self.set_rotation(old_rotation)
  9235. annot_postprocess(self, annot)
  9236. return annot
  9237. def add_file_annot(
  9238. self,
  9239. point: point_like,
  9240. buffer_: ByteString,
  9241. filename: str,
  9242. ufilename: OptStr =None,
  9243. desc: OptStr =None,
  9244. icon: OptStr =None
  9245. ) -> Annot:
  9246. """Add a 'FileAttachment' annotation."""
  9247. old_rotation = annot_preprocess(self)
  9248. try:
  9249. annot = self._add_file_annot(point,
  9250. buffer_,
  9251. filename,
  9252. ufilename=ufilename,
  9253. desc=desc,
  9254. icon=icon,
  9255. )
  9256. finally:
  9257. if old_rotation != 0:
  9258. self.set_rotation(old_rotation)
  9259. annot_postprocess(self, annot)
  9260. return annot
  9261. def add_freetext_annot(
  9262. self,
  9263. rect: rect_like,
  9264. text: str,
  9265. *,
  9266. fontsize: float =11,
  9267. fontname: OptStr =None,
  9268. text_color: OptSeq =None,
  9269. fill_color: OptSeq =None,
  9270. border_color: OptSeq =None,
  9271. border_width: float =0,
  9272. dashes: OptSeq =None,
  9273. callout: OptSeq =None,
  9274. line_end: int=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
  9275. opacity: float =1,
  9276. align: int =0,
  9277. rotate: int =0,
  9278. richtext=False,
  9279. style=None,
  9280. ) -> Annot:
  9281. """Add a 'FreeText' annotation."""
  9282. old_rotation = annot_preprocess(self)
  9283. try:
  9284. annot = self._add_freetext_annot(
  9285. rect,
  9286. text,
  9287. fontsize=fontsize,
  9288. fontname=fontname,
  9289. text_color=text_color,
  9290. fill_color=fill_color,
  9291. border_color=border_color,
  9292. border_width=border_width,
  9293. dashes=dashes,
  9294. callout=callout,
  9295. line_end=line_end,
  9296. opacity=opacity,
  9297. align=align,
  9298. rotate=rotate,
  9299. richtext=richtext,
  9300. style=style,
  9301. )
  9302. finally:
  9303. if old_rotation != 0:
  9304. self.set_rotation(old_rotation)
  9305. annot_postprocess(self, annot)
  9306. return annot
  9307. def add_highlight_annot(self, quads=None, start=None,
  9308. stop=None, clip=None) -> Annot:
  9309. """Add a 'Highlight' annotation."""
  9310. if quads is None:
  9311. q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
  9312. else:
  9313. q = CheckMarkerArg(quads)
  9314. ret = self._add_text_marker(q, mupdf.PDF_ANNOT_HIGHLIGHT)
  9315. return ret
  9316. def add_ink_annot(self, handwriting: list) -> Annot:
  9317. """Add a 'Ink' ('handwriting') annotation.
  9318. The argument must be a list of lists of point_likes.
  9319. """
  9320. old_rotation = annot_preprocess(self)
  9321. try:
  9322. annot = self._add_ink_annot(handwriting)
  9323. finally:
  9324. if old_rotation != 0:
  9325. self.set_rotation(old_rotation)
  9326. annot_postprocess(self, annot)
  9327. return annot
  9328. def add_line_annot(self, p1: point_like, p2: point_like) -> Annot:
  9329. """Add a 'Line' annotation."""
  9330. old_rotation = annot_preprocess(self)
  9331. try:
  9332. annot = self._add_line_annot(p1, p2)
  9333. finally:
  9334. if old_rotation != 0:
  9335. self.set_rotation(old_rotation)
  9336. annot_postprocess(self, annot)
  9337. return annot
  9338. def add_polygon_annot(self, points: list) -> Annot:
  9339. """Add a 'Polygon' annotation."""
  9340. old_rotation = annot_preprocess(self)
  9341. try:
  9342. annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLYGON)
  9343. finally:
  9344. if old_rotation != 0:
  9345. self.set_rotation(old_rotation)
  9346. annot_postprocess(self, annot)
  9347. return annot
  9348. def add_polyline_annot(self, points: list) -> Annot:
  9349. """Add a 'PolyLine' annotation."""
  9350. old_rotation = annot_preprocess(self)
  9351. try:
  9352. annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLY_LINE)
  9353. finally:
  9354. if old_rotation != 0:
  9355. self.set_rotation(old_rotation)
  9356. annot_postprocess(self, annot)
  9357. return annot
  9358. def add_rect_annot(self, rect: rect_like) -> Annot:
  9359. """Add a 'Square' (rectangle) annotation."""
  9360. old_rotation = annot_preprocess(self)
  9361. try:
  9362. annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_SQUARE)
  9363. finally:
  9364. if old_rotation != 0:
  9365. self.set_rotation(old_rotation)
  9366. annot_postprocess(self, annot)
  9367. return annot
  9368. def add_redact_annot(
  9369. self,
  9370. quad,
  9371. text: OptStr =None,
  9372. fontname: OptStr =None,
  9373. fontsize: float =11,
  9374. align: int =0,
  9375. fill: OptSeq =None,
  9376. text_color: OptSeq =None,
  9377. cross_out: bool =True,
  9378. ) -> Annot:
  9379. """Add a 'Redact' annotation."""
  9380. da_str = None
  9381. if text and not set(string.whitespace).issuperset(text):
  9382. CheckColor(fill)
  9383. CheckColor(text_color)
  9384. if not fontname:
  9385. fontname = "Helv"
  9386. if not fontsize:
  9387. fontsize = 11
  9388. if not text_color:
  9389. text_color = (0, 0, 0)
  9390. if hasattr(text_color, "__float__"):
  9391. text_color = (text_color, text_color, text_color)
  9392. if len(text_color) > 3:
  9393. text_color = text_color[:3]
  9394. fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf"
  9395. da_str = fmt.format(*text_color, f=fontname, s=fontsize)
  9396. if fill is None:
  9397. fill = (1, 1, 1)
  9398. if fill:
  9399. if hasattr(fill, "__float__"):
  9400. fill = (fill, fill, fill)
  9401. if len(fill) > 3:
  9402. fill = fill[:3]
  9403. else:
  9404. text = None
  9405. old_rotation = annot_preprocess(self)
  9406. try:
  9407. annot = self._add_redact_annot(quad, text=text, da_str=da_str,
  9408. align=align, fill=fill)
  9409. finally:
  9410. if old_rotation != 0:
  9411. self.set_rotation(old_rotation)
  9412. annot_postprocess(self, annot)
  9413. #-------------------------------------------------------------
  9414. # change appearance to show a crossed-out rectangle
  9415. #-------------------------------------------------------------
  9416. if cross_out:
  9417. ap_tab = annot._getAP().splitlines()[:-1] # get the 4 commands only
  9418. _, LL, LR, UR, UL = ap_tab
  9419. ap_tab.append(LR)
  9420. ap_tab.append(LL)
  9421. ap_tab.append(UR)
  9422. ap_tab.append(LL)
  9423. ap_tab.append(UL)
  9424. ap_tab.append(b"S")
  9425. ap = b"\n".join(ap_tab)
  9426. annot._setAP(ap, 0)
  9427. return annot
  9428. def add_squiggly_annot(
  9429. self,
  9430. quads=None,
  9431. start=None,
  9432. stop=None,
  9433. clip=None,
  9434. ) -> Annot:
  9435. """Add a 'Squiggly' annotation."""
  9436. if quads is None:
  9437. q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
  9438. else:
  9439. q = CheckMarkerArg(quads)
  9440. return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY)
  9441. def add_stamp_annot(self, rect: rect_like, stamp=0) -> Annot:
  9442. """Add a ('rubber') 'Stamp' annotation."""
  9443. old_rotation = annot_preprocess(self)
  9444. try:
  9445. annot = self._add_stamp_annot(rect, stamp)
  9446. finally:
  9447. if old_rotation != 0:
  9448. self.set_rotation(old_rotation)
  9449. annot_postprocess(self, annot)
  9450. return annot
  9451. def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
  9452. """Add a 'StrikeOut' annotation."""
  9453. if quads is None:
  9454. q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
  9455. else:
  9456. q = CheckMarkerArg(quads)
  9457. return self._add_text_marker(q, mupdf.PDF_ANNOT_STRIKE_OUT)
  9458. def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> Annot:
  9459. """Add a 'Text' (sticky note) annotation."""
  9460. old_rotation = annot_preprocess(self)
  9461. try:
  9462. annot = self._add_text_annot(point, text, icon=icon)
  9463. finally:
  9464. if old_rotation != 0:
  9465. self.set_rotation(old_rotation)
  9466. annot_postprocess(self, annot)
  9467. return annot
  9468. def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
  9469. """Add a 'Underline' annotation."""
  9470. if quads is None:
  9471. q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
  9472. else:
  9473. q = CheckMarkerArg(quads)
  9474. return self._add_text_marker(q, mupdf.PDF_ANNOT_UNDERLINE)
  9475. def add_widget(self, widget: Widget) -> Annot:
  9476. """Add a 'Widget' (form field)."""
  9477. CheckParent(self)
  9478. doc = self.parent
  9479. if not doc.is_pdf:
  9480. raise ValueError("is no PDF")
  9481. widget._validate()
  9482. annot = self._addWidget(widget.field_type, widget.field_name)
  9483. if not annot:
  9484. return None
  9485. annot.thisown = True
  9486. annot.parent = weakref.proxy(self) # owning page object
  9487. self._annot_refs[id(annot)] = annot
  9488. widget.parent = annot.parent
  9489. widget._annot = annot
  9490. widget.update()
  9491. return annot
  9492. def annot_names(self):
  9493. '''
  9494. page get list of annot names
  9495. '''
  9496. """List of names of annotations, fields and links."""
  9497. CheckParent(self)
  9498. page = self._pdf_page(required=False)
  9499. if not page.m_internal:
  9500. return []
  9501. return JM_get_annot_id_list(page)
  9502. def annot_xrefs(self):
  9503. '''
  9504. List of xref numbers of annotations, fields and links.
  9505. '''
  9506. return JM_get_annot_xref_list2(self)
  9507. def annots(self, types=None):
  9508. """ Generator over the annotations of a page.
  9509. Args:
  9510. types: (list) annotation types to subselect from. If none,
  9511. all annotations are returned. E.g. types=[PDF_ANNOT_LINE]
  9512. will only yield line annotations.
  9513. """
  9514. skip_types = (mupdf.PDF_ANNOT_LINK, mupdf.PDF_ANNOT_POPUP, mupdf.PDF_ANNOT_WIDGET)
  9515. if not hasattr(types, "__getitem__"):
  9516. annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types]
  9517. else:
  9518. annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types]
  9519. for xref in annot_xrefs:
  9520. annot = self.load_annot(xref)
  9521. annot._yielded=True
  9522. yield annot
  9523. def apply_redactions(
  9524. page: 'Page',
  9525. images: int = 2,
  9526. graphics: int = 1,
  9527. text: int = 0,
  9528. ) -> bool:
  9529. """Apply the redaction annotations of the page.
  9530. Args:
  9531. page: the PDF page.
  9532. images:
  9533. 0 - ignore images
  9534. 1 - remove all overlapping images
  9535. 2 - blank out overlapping image parts
  9536. 3 - remove image unless invisible
  9537. graphics:
  9538. 0 - ignore graphics
  9539. 1 - remove graphics if contained in rectangle
  9540. 2 - remove all overlapping graphics
  9541. text:
  9542. 0 - remove text
  9543. 1 - ignore text
  9544. """
  9545. def center_rect(annot_rect, new_text, font, fsize):
  9546. """Calculate minimal sub-rectangle for the overlay text.
  9547. Notes:
  9548. Because 'insert_textbox' supports no vertical text centering,
  9549. we calculate an approximate number of lines here and return a
  9550. sub-rect with smaller height, which should still be sufficient.
  9551. Args:
  9552. annot_rect: the annotation rectangle
  9553. new_text: the text to insert.
  9554. font: the fontname. Must be one of the CJK or Base-14 set, else
  9555. the rectangle is returned unchanged.
  9556. fsize: the fontsize
  9557. Returns:
  9558. A rectangle to use instead of the annot rectangle.
  9559. """
  9560. if not new_text or annot_rect.width <= EPSILON:
  9561. return annot_rect
  9562. try:
  9563. text_width = get_text_length(new_text, font, fsize)
  9564. except (ValueError, mupdf.FzErrorBase): # unsupported font
  9565. if g_exceptions_verbose:
  9566. exception_info()
  9567. return annot_rect
  9568. line_height = fsize * 1.2
  9569. limit = annot_rect.width
  9570. h = math.ceil(text_width / limit) * line_height # estimate rect height
  9571. if h >= annot_rect.height:
  9572. return annot_rect
  9573. r = annot_rect
  9574. y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
  9575. r.y0 = y
  9576. return r
  9577. CheckParent(page)
  9578. doc = page.parent
  9579. if doc.is_encrypted or doc.is_closed:
  9580. raise ValueError("document closed or encrypted")
  9581. if not doc.is_pdf:
  9582. raise ValueError("is no PDF")
  9583. redact_annots = [] # storage of annot values
  9584. for annot in page.annots(
  9585. types=(mupdf.PDF_ANNOT_REDACT,) # pylint: disable=no-member
  9586. ):
  9587. # loop redactions
  9588. redact_annots.append(annot._get_redact_values()) # save annot values
  9589. if redact_annots == []: # any redactions on this page?
  9590. return False # no redactions
  9591. rc = page._apply_redactions(text, images, graphics) # call MuPDF
  9592. if not rc: # should not happen really
  9593. raise ValueError("Error applying redactions.")
  9594. # now write replacement text in old redact rectangles
  9595. shape = page.new_shape()
  9596. for redact in redact_annots:
  9597. annot_rect = redact["rect"]
  9598. fill = redact["fill"]
  9599. if fill:
  9600. shape.draw_rect(annot_rect) # colorize the rect background
  9601. shape.finish(fill=fill, color=fill)
  9602. if "text" in redact.keys(): # if we also have text
  9603. new_text = redact["text"]
  9604. align = redact.get("align", 0)
  9605. fname = redact["fontname"]
  9606. fsize = redact["fontsize"]
  9607. color = redact["text_color"]
  9608. # try finding vertical centered sub-rect
  9609. trect = center_rect(annot_rect, new_text, fname, fsize)
  9610. rc = -1
  9611. while rc < 0 and fsize >= 4: # while not enough room
  9612. # (re-) try insertion
  9613. rc = shape.insert_textbox(
  9614. trect,
  9615. new_text,
  9616. fontname=fname,
  9617. fontsize=fsize,
  9618. color=color,
  9619. align=align,
  9620. )
  9621. fsize -= 0.5 # reduce font if unsuccessful
  9622. shape.commit() # append new contents object
  9623. return True
  9624. def recolor(self, components=1):
  9625. """Convert colorspaces of objects on the page.
  9626. Valid values are 1, 3 and 4.
  9627. """
  9628. if components not in (1, 3, 4):
  9629. raise ValueError("components must be one of 1, 3, 4")
  9630. pdfdoc = _as_pdf_document(self.parent)
  9631. ropt = mupdf.pdf_recolor_options()
  9632. ropt.num_comp = components
  9633. ropts = mupdf.PdfRecolorOptions(ropt)
  9634. mupdf.pdf_recolor_page(pdfdoc, self.number, ropts)
  9635. def clip_to_rect(self, rect):
  9636. """Clip away page content outside the rectangle."""
  9637. clip = Rect(rect)
  9638. if clip.is_infinite or (clip & self.rect).is_empty:
  9639. raise ValueError("rect must not be infinite or empty")
  9640. clip *= self.transformation_matrix
  9641. pdfpage = _as_pdf_page(self)
  9642. pclip = JM_rect_from_py(clip)
  9643. mupdf.pdf_clip_page(pdfpage, pclip)
  9644. def get_layout(self):
  9645. """Try to access layout information."""
  9646. if self.layout_information is not None:
  9647. # layout information already present
  9648. return
  9649. if not _get_layout:
  9650. # no layout information available
  9651. return
  9652. layout_info = _get_layout(self)
  9653. self.layout_information = layout_info
  9654. @property
  9655. def artbox(self):
  9656. """The ArtBox"""
  9657. rect = self._other_box("ArtBox")
  9658. if rect is None:
  9659. return self.cropbox
  9660. mb = self.mediabox
  9661. return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
  9662. @property
  9663. def bleedbox(self):
  9664. """The BleedBox"""
  9665. rect = self._other_box("BleedBox")
  9666. if rect is None:
  9667. return self.cropbox
  9668. mb = self.mediabox
  9669. return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
  9670. def bound(self):
  9671. """Get page rectangle."""
  9672. CheckParent(self)
  9673. page = _as_fz_page(self.this)
  9674. val = mupdf.fz_bound_page(page)
  9675. val = Rect(val)
  9676. if val.is_infinite and self.parent.is_pdf:
  9677. cb = self.cropbox
  9678. w, h = cb.width, cb.height
  9679. if self.rotation not in (0, 180):
  9680. w, h = h, w
  9681. val = Rect(0, 0, w, h)
  9682. msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1]
  9683. message(msg)
  9684. return val
  9685. def clean_contents(self, sanitize=1):
  9686. if not sanitize and not self.is_wrapped:
  9687. self.wrap_contents()
  9688. page = _as_pdf_page( self.this, required=False)
  9689. if not page.m_internal:
  9690. return
  9691. filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize)
  9692. mupdf.pdf_filter_page_contents( page.doc(), page, filter_)
  9693. @property
  9694. def cropbox(self):
  9695. """The CropBox."""
  9696. CheckParent(self)
  9697. page = self._pdf_page(required=False)
  9698. if not page.m_internal:
  9699. val = mupdf.fz_bound_page(self.this)
  9700. else:
  9701. val = JM_cropbox(page.obj())
  9702. val = Rect(val)
  9703. return val
  9704. @property
  9705. def cropbox_position(self):
  9706. return self.cropbox.tl
  9707. def delete_annot(self, annot):
  9708. """Delete annot and return next one."""
  9709. CheckParent(self)
  9710. CheckParent(annot)
  9711. page = self._pdf_page()
  9712. while 1:
  9713. # first loop through all /IRT annots and remove them
  9714. irt_annot = JM_find_annot_irt(annot.this)
  9715. if not irt_annot: # no more there
  9716. break
  9717. mupdf.pdf_delete_annot(page, irt_annot.this)
  9718. nextannot = mupdf.pdf_next_annot(annot.this) # store next
  9719. mupdf.pdf_delete_annot(page, annot.this)
  9720. val = Annot(nextannot)
  9721. if val:
  9722. val.thisown = True
  9723. val.parent = weakref.proxy(self) # owning page object
  9724. val.parent._annot_refs[id(val)] = val
  9725. annot._erase()
  9726. return val
  9727. def delete_image(page: 'Page', xref: int):
  9728. """Delete the image referred to by xef.
  9729. Actually replaces by a small transparent Pixmap using method Page.replace_image.
  9730. Args:
  9731. xref: xref of the image to delete.
  9732. """
  9733. # make a small 100% transparent pixmap (of just any dimension)
  9734. pix = Pixmap(csGRAY, (0, 0, 1, 1), 1)
  9735. pix.clear_with() # clear all samples bytes to 0x00
  9736. page.replace_image(xref, pixmap=pix)
  9737. def delete_link(self, linkdict):
  9738. """Delete a Link."""
  9739. CheckParent(self)
  9740. if not isinstance( linkdict, dict):
  9741. return # have no dictionary
  9742. def finished():
  9743. if linkdict["xref"] == 0: return
  9744. try:
  9745. linkid = linkdict["id"]
  9746. linkobj = self._annot_refs[linkid]
  9747. linkobj._erase()
  9748. except Exception:
  9749. # Don't print this exception, to match classic. Issue #2841.
  9750. if g_exceptions_verbose > 1: exception_info()
  9751. pass
  9752. page = _as_pdf_page(self.this, required=False)
  9753. if not page.m_internal:
  9754. return finished() # have no PDF
  9755. xref = linkdict[dictkey_xref]
  9756. if xref < 1:
  9757. return finished() # invalid xref
  9758. annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
  9759. if not annots.m_internal:
  9760. return finished() # have no annotations
  9761. len_ = mupdf.pdf_array_len( annots)
  9762. if len_ == 0:
  9763. return finished()
  9764. oxref = 0
  9765. for i in range( len_):
  9766. oxref = mupdf.pdf_to_num( mupdf.pdf_array_get( annots, i))
  9767. if xref == oxref:
  9768. break # found xref in annotations
  9769. if xref != oxref:
  9770. return finished() # xref not in annotations
  9771. mupdf.pdf_array_delete( annots, i) # delete entry in annotations
  9772. mupdf.pdf_delete_object( page.doc(), xref) # delete link object
  9773. mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots)
  9774. JM_refresh_links( page)
  9775. return finished()
  9776. def delete_widget(page: 'Page', widget: Widget) -> Widget:
  9777. """Delete widget from page and return the next one."""
  9778. CheckParent(page)
  9779. annot = getattr(widget, "_annot", None)
  9780. if annot is None:
  9781. raise ValueError("bad type: widget")
  9782. nextwidget = widget.next
  9783. page.delete_annot(annot)
  9784. widget._annot.parent = None
  9785. keylist = list(widget.__dict__.keys())
  9786. for key in keylist:
  9787. del widget.__dict__[key]
  9788. return nextwidget
  9789. @property
  9790. def derotation_matrix(self) -> Matrix:
  9791. """Reflects page de-rotation."""
  9792. if g_use_extra:
  9793. return Matrix(extra.Page_derotate_matrix( self.this))
  9794. pdfpage = self._pdf_page(required=False)
  9795. if not pdfpage.m_internal:
  9796. return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
  9797. return Matrix(JM_derotate_page_matrix(pdfpage))
  9798. def draw_bezier(
  9799. page: 'Page',
  9800. p1: point_like,
  9801. p2: point_like,
  9802. p3: point_like,
  9803. p4: point_like,
  9804. color: OptSeq = (0,),
  9805. fill: OptSeq = None,
  9806. dashes: OptStr = None,
  9807. width: float = 1,
  9808. morph: OptStr = None,
  9809. closePath: bool = False,
  9810. lineCap: int = 0,
  9811. lineJoin: int = 0,
  9812. overlay: bool = True,
  9813. stroke_opacity: float = 1,
  9814. fill_opacity: float = 1,
  9815. oc: int = 0,
  9816. ) -> Point:
  9817. """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
  9818. img = page.new_shape()
  9819. Q = img.draw_bezier(Point(p1), Point(p2), Point(p3), Point(p4))
  9820. img.finish(
  9821. color=color,
  9822. fill=fill,
  9823. dashes=dashes,
  9824. width=width,
  9825. lineCap=lineCap,
  9826. lineJoin=lineJoin,
  9827. morph=morph,
  9828. closePath=closePath,
  9829. stroke_opacity=stroke_opacity,
  9830. fill_opacity=fill_opacity,
  9831. oc=oc,
  9832. )
  9833. img.commit(overlay)
  9834. return Q
  9835. def draw_circle(
  9836. page: 'Page',
  9837. center: point_like,
  9838. radius: float,
  9839. color: OptSeq = (0,),
  9840. fill: OptSeq = None,
  9841. morph: OptSeq = None,
  9842. dashes: OptStr = None,
  9843. width: float = 1,
  9844. lineCap: int = 0,
  9845. lineJoin: int = 0,
  9846. overlay: bool = True,
  9847. stroke_opacity: float = 1,
  9848. fill_opacity: float = 1,
  9849. oc: int = 0,
  9850. ) -> Point:
  9851. """Draw a circle given its center and radius."""
  9852. img = page.new_shape()
  9853. Q = img.draw_circle(Point(center), radius)
  9854. img.finish(
  9855. color=color,
  9856. fill=fill,
  9857. dashes=dashes,
  9858. width=width,
  9859. lineCap=lineCap,
  9860. lineJoin=lineJoin,
  9861. morph=morph,
  9862. stroke_opacity=stroke_opacity,
  9863. fill_opacity=fill_opacity,
  9864. oc=oc,
  9865. )
  9866. img.commit(overlay)
  9867. return Q
  9868. def draw_curve(
  9869. page: 'Page',
  9870. p1: point_like,
  9871. p2: point_like,
  9872. p3: point_like,
  9873. color: OptSeq = (0,),
  9874. fill: OptSeq = None,
  9875. dashes: OptStr = None,
  9876. width: float = 1,
  9877. morph: OptSeq = None,
  9878. closePath: bool = False,
  9879. lineCap: int = 0,
  9880. lineJoin: int = 0,
  9881. overlay: bool = True,
  9882. stroke_opacity: float = 1,
  9883. fill_opacity: float = 1,
  9884. oc: int = 0,
  9885. ) -> Point:
  9886. """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
  9887. img = page.new_shape()
  9888. Q = img.draw_curve(Point(p1), Point(p2), Point(p3))
  9889. img.finish(
  9890. color=color,
  9891. fill=fill,
  9892. dashes=dashes,
  9893. width=width,
  9894. lineCap=lineCap,
  9895. lineJoin=lineJoin,
  9896. morph=morph,
  9897. closePath=closePath,
  9898. stroke_opacity=stroke_opacity,
  9899. fill_opacity=fill_opacity,
  9900. oc=oc,
  9901. )
  9902. img.commit(overlay)
  9903. return Q
  9904. def draw_line(
  9905. page: 'Page',
  9906. p1: point_like,
  9907. p2: point_like,
  9908. color: OptSeq = (0,),
  9909. dashes: OptStr = None,
  9910. width: float = 1,
  9911. lineCap: int = 0,
  9912. lineJoin: int = 0,
  9913. overlay: bool = True,
  9914. morph: OptSeq = None,
  9915. stroke_opacity: float = 1,
  9916. fill_opacity: float = 1,
  9917. oc=0,
  9918. ) -> Point:
  9919. """Draw a line from point p1 to point p2."""
  9920. img = page.new_shape()
  9921. p = img.draw_line(Point(p1), Point(p2))
  9922. img.finish(
  9923. color=color,
  9924. dashes=dashes,
  9925. width=width,
  9926. closePath=False,
  9927. lineCap=lineCap,
  9928. lineJoin=lineJoin,
  9929. morph=morph,
  9930. stroke_opacity=stroke_opacity,
  9931. fill_opacity=fill_opacity,
  9932. oc=oc,
  9933. )
  9934. img.commit(overlay)
  9935. return p
  9936. def draw_oval(
  9937. page: 'Page',
  9938. rect: typing.Union[rect_like, quad_like],
  9939. color: OptSeq = (0,),
  9940. fill: OptSeq = None,
  9941. dashes: OptStr = None,
  9942. morph: OptSeq = None,
  9943. width: float = 1,
  9944. lineCap: int = 0,
  9945. lineJoin: int = 0,
  9946. overlay: bool = True,
  9947. stroke_opacity: float = 1,
  9948. fill_opacity: float = 1,
  9949. oc: int = 0,
  9950. ) -> Point:
  9951. """Draw an oval given its containing rectangle or quad."""
  9952. img = page.new_shape()
  9953. Q = img.draw_oval(rect)
  9954. img.finish(
  9955. color=color,
  9956. fill=fill,
  9957. dashes=dashes,
  9958. width=width,
  9959. lineCap=lineCap,
  9960. lineJoin=lineJoin,
  9961. morph=morph,
  9962. stroke_opacity=stroke_opacity,
  9963. fill_opacity=fill_opacity,
  9964. oc=oc,
  9965. )
  9966. img.commit(overlay)
  9967. return Q
  9968. def draw_polyline(
  9969. page: 'Page',
  9970. points: list,
  9971. color: OptSeq = (0,),
  9972. fill: OptSeq = None,
  9973. dashes: OptStr = None,
  9974. width: float = 1,
  9975. morph: OptSeq = None,
  9976. lineCap: int = 0,
  9977. lineJoin: int = 0,
  9978. overlay: bool = True,
  9979. closePath: bool = False,
  9980. stroke_opacity: float = 1,
  9981. fill_opacity: float = 1,
  9982. oc: int = 0,
  9983. ) -> Point:
  9984. """Draw multiple connected line segments."""
  9985. img = page.new_shape()
  9986. Q = img.draw_polyline(points)
  9987. img.finish(
  9988. color=color,
  9989. fill=fill,
  9990. dashes=dashes,
  9991. width=width,
  9992. lineCap=lineCap,
  9993. lineJoin=lineJoin,
  9994. morph=morph,
  9995. closePath=closePath,
  9996. stroke_opacity=stroke_opacity,
  9997. fill_opacity=fill_opacity,
  9998. oc=oc,
  9999. )
  10000. img.commit(overlay)
  10001. return Q
  10002. def draw_quad(
  10003. page: 'Page',
  10004. quad: quad_like,
  10005. color: OptSeq = (0,),
  10006. fill: OptSeq = None,
  10007. dashes: OptStr = None,
  10008. width: float = 1,
  10009. lineCap: int = 0,
  10010. lineJoin: int = 0,
  10011. morph: OptSeq = None,
  10012. overlay: bool = True,
  10013. stroke_opacity: float = 1,
  10014. fill_opacity: float = 1,
  10015. oc: int = 0,
  10016. ) -> Point:
  10017. """Draw a quadrilateral."""
  10018. img = page.new_shape()
  10019. Q = img.draw_quad(Quad(quad))
  10020. img.finish(
  10021. color=color,
  10022. fill=fill,
  10023. dashes=dashes,
  10024. width=width,
  10025. lineCap=lineCap,
  10026. lineJoin=lineJoin,
  10027. morph=morph,
  10028. stroke_opacity=stroke_opacity,
  10029. fill_opacity=fill_opacity,
  10030. oc=oc,
  10031. )
  10032. img.commit(overlay)
  10033. return Q
  10034. def draw_rect(
  10035. page: 'Page',
  10036. rect: rect_like,
  10037. color: OptSeq = (0,),
  10038. fill: OptSeq = None,
  10039. dashes: OptStr = None,
  10040. width: float = 1,
  10041. lineCap: int = 0,
  10042. lineJoin: int = 0,
  10043. morph: OptSeq = None,
  10044. overlay: bool = True,
  10045. stroke_opacity: float = 1,
  10046. fill_opacity: float = 1,
  10047. oc: int = 0,
  10048. radius=None,
  10049. ) -> Point:
  10050. '''
  10051. Draw a rectangle. See Shape class method for details.
  10052. '''
  10053. img = page.new_shape()
  10054. Q = img.draw_rect(Rect(rect), radius=radius)
  10055. img.finish(
  10056. color=color,
  10057. fill=fill,
  10058. dashes=dashes,
  10059. width=width,
  10060. lineCap=lineCap,
  10061. lineJoin=lineJoin,
  10062. morph=morph,
  10063. stroke_opacity=stroke_opacity,
  10064. fill_opacity=fill_opacity,
  10065. oc=oc,
  10066. )
  10067. img.commit(overlay)
  10068. return Q
  10069. def draw_sector(
  10070. page: 'Page',
  10071. center: point_like,
  10072. point: point_like,
  10073. beta: float,
  10074. color: OptSeq = (0,),
  10075. fill: OptSeq = None,
  10076. dashes: OptStr = None,
  10077. fullSector: bool = True,
  10078. morph: OptSeq = None,
  10079. width: float = 1,
  10080. closePath: bool = False,
  10081. lineCap: int = 0,
  10082. lineJoin: int = 0,
  10083. overlay: bool = True,
  10084. stroke_opacity: float = 1,
  10085. fill_opacity: float = 1,
  10086. oc: int = 0,
  10087. ) -> Point:
  10088. """Draw a circle sector given circle center, one arc end point and the angle of the arc.
  10089. Parameters:
  10090. center -- center of circle
  10091. point -- arc end point
  10092. beta -- angle of arc (degrees)
  10093. fullSector -- connect arc ends with center
  10094. """
  10095. img = page.new_shape()
  10096. Q = img.draw_sector(Point(center), Point(point), beta, fullSector=fullSector)
  10097. img.finish(
  10098. color=color,
  10099. fill=fill,
  10100. dashes=dashes,
  10101. width=width,
  10102. lineCap=lineCap,
  10103. lineJoin=lineJoin,
  10104. morph=morph,
  10105. closePath=closePath,
  10106. stroke_opacity=stroke_opacity,
  10107. fill_opacity=fill_opacity,
  10108. oc=oc,
  10109. )
  10110. img.commit(overlay)
  10111. return Q
  10112. def draw_squiggle(
  10113. page: 'Page',
  10114. p1: point_like,
  10115. p2: point_like,
  10116. breadth: float = 2,
  10117. color: OptSeq = (0,),
  10118. dashes: OptStr = None,
  10119. width: float = 1,
  10120. lineCap: int = 0,
  10121. lineJoin: int = 0,
  10122. overlay: bool = True,
  10123. morph: OptSeq = None,
  10124. stroke_opacity: float = 1,
  10125. fill_opacity: float = 1,
  10126. oc: int = 0,
  10127. ) -> Point:
  10128. """Draw a squiggly line from point p1 to point p2."""
  10129. img = page.new_shape()
  10130. p = img.draw_squiggle(Point(p1), Point(p2), breadth=breadth)
  10131. img.finish(
  10132. color=color,
  10133. dashes=dashes,
  10134. width=width,
  10135. closePath=False,
  10136. lineCap=lineCap,
  10137. lineJoin=lineJoin,
  10138. morph=morph,
  10139. stroke_opacity=stroke_opacity,
  10140. fill_opacity=fill_opacity,
  10141. oc=oc,
  10142. )
  10143. img.commit(overlay)
  10144. return p
  10145. def draw_zigzag(
  10146. page: 'Page',
  10147. p1: point_like,
  10148. p2: point_like,
  10149. breadth: float = 2,
  10150. color: OptSeq = (0,),
  10151. dashes: OptStr = None,
  10152. width: float = 1,
  10153. lineCap: int = 0,
  10154. lineJoin: int = 0,
  10155. overlay: bool = True,
  10156. morph: OptSeq = None,
  10157. stroke_opacity: float = 1,
  10158. fill_opacity: float = 1,
  10159. oc: int = 0,
  10160. ) -> Point:
  10161. """Draw a zigzag line from point p1 to point p2."""
  10162. img = page.new_shape()
  10163. p = img.draw_zigzag(Point(p1), Point(p2), breadth=breadth)
  10164. img.finish(
  10165. color=color,
  10166. dashes=dashes,
  10167. width=width,
  10168. closePath=False,
  10169. lineCap=lineCap,
  10170. lineJoin=lineJoin,
  10171. morph=morph,
  10172. stroke_opacity=stroke_opacity,
  10173. fill_opacity=fill_opacity,
  10174. oc=oc,
  10175. )
  10176. img.commit(overlay)
  10177. return p
  10178. def extend_textpage(self, tpage, flags=0, matrix=None):
  10179. page = self.this
  10180. tp = tpage.this
  10181. assert isinstance( tp, mupdf.FzStextPage)
  10182. options = mupdf.FzStextOptions()
  10183. options.flags = flags
  10184. ctm = JM_matrix_from_py(matrix)
  10185. dev = mupdf.FzDevice(tp, options)
  10186. mupdf.fz_run_page( page, dev, ctm, mupdf.FzCookie())
  10187. mupdf.fz_close_device( dev)
  10188. @property
  10189. def first_annot(self):
  10190. """First annotation."""
  10191. CheckParent(self)
  10192. page = self._pdf_page(required=False)
  10193. if not page.m_internal:
  10194. return
  10195. annot = mupdf.pdf_first_annot(page)
  10196. if not annot.m_internal:
  10197. return
  10198. val = Annot(annot)
  10199. val.thisown = True
  10200. val.parent = weakref.proxy(self) # owning page object
  10201. self._annot_refs[id(val)] = val
  10202. return val
  10203. @property
  10204. def first_link(self):
  10205. '''
  10206. First link on page
  10207. '''
  10208. return self.load_links()
  10209. @property
  10210. def first_widget(self):
  10211. """First widget/field."""
  10212. CheckParent(self)
  10213. annot = 0
  10214. page = self._pdf_page(required=False)
  10215. if not page.m_internal:
  10216. return
  10217. annot = mupdf.pdf_first_widget(page)
  10218. if not annot.m_internal:
  10219. return
  10220. val = Annot(annot)
  10221. val.thisown = True
  10222. val.parent = weakref.proxy(self) # owning page object
  10223. self._annot_refs[id(val)] = val
  10224. widget = Widget()
  10225. TOOLS._fill_widget(val, widget)
  10226. val = widget
  10227. return val
  10228. def get_bboxlog(self, layers=None):
  10229. CheckParent(self)
  10230. old_rotation = self.rotation
  10231. if old_rotation != 0:
  10232. self.set_rotation(0)
  10233. page = self.this
  10234. rc = []
  10235. inc_layers = True if layers else False
  10236. dev = JM_new_bbox_device( rc, inc_layers)
  10237. mupdf.fz_run_page( page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  10238. mupdf.fz_close_device( dev)
  10239. if old_rotation != 0:
  10240. self.set_rotation(old_rotation)
  10241. return rc
  10242. def get_cdrawings(self, extended=None, callback=None, method=None):
  10243. """Extract vector graphics ("line art") from the page."""
  10244. CheckParent(self)
  10245. old_rotation = self.rotation
  10246. if old_rotation != 0:
  10247. self.set_rotation(0)
  10248. page = self.this
  10249. if isinstance(page, mupdf.PdfPage):
  10250. # Downcast pdf_page to fz_page.
  10251. page = mupdf.FzPage(page)
  10252. assert isinstance(page, mupdf.FzPage), f'{self.this=}'
  10253. clips = True if extended else False
  10254. prect = mupdf.fz_bound_page(page)
  10255. if 1 or g_use_extra:
  10256. rc = extra.get_cdrawings(page, extended, callback, method)
  10257. else:
  10258. rc = list()
  10259. if callable(callback) or method is not None:
  10260. dev = JM_new_lineart_device_Device(callback, clips, method)
  10261. else:
  10262. dev = JM_new_lineart_device_Device(rc, clips, method)
  10263. dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
  10264. mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  10265. mupdf.fz_close_device(dev)
  10266. if old_rotation != 0:
  10267. self.set_rotation(old_rotation)
  10268. if callable(callback) or method is not None:
  10269. return
  10270. return rc
  10271. def get_contents(self):
  10272. """Get xrefs of /Contents objects."""
  10273. CheckParent(self)
  10274. ret = []
  10275. page = _as_pdf_page(self.this)
  10276. obj = page.obj()
  10277. contents = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Contents)
  10278. if mupdf.pdf_is_array(contents):
  10279. n = mupdf.pdf_array_len(contents)
  10280. for i in range(n):
  10281. icont = mupdf.pdf_array_get(contents, i)
  10282. xref = mupdf.pdf_to_num(icont)
  10283. ret.append(xref)
  10284. elif contents.m_internal:
  10285. xref = mupdf.pdf_to_num(contents)
  10286. ret.append( xref)
  10287. return ret
  10288. def get_displaylist(self, annots=1):
  10289. '''
  10290. Make a DisplayList from the page for Pixmap generation.
  10291. Include (default) or exclude annotations.
  10292. '''
  10293. CheckParent(self)
  10294. if annots:
  10295. dl = mupdf.fz_new_display_list_from_page(self.this)
  10296. else:
  10297. dl = mupdf.fz_new_display_list_from_page_contents(self.this)
  10298. return DisplayList(dl)
  10299. def get_drawings(self, extended: bool=False) -> list:
  10300. """Retrieve vector graphics. The extended version includes clips.
  10301. Note:
  10302. For greater comfort, this method converts point-likes, rect-likes, quad-likes
  10303. of the C version to respective Point / Rect / Quad objects.
  10304. It also adds default items that are missing in original path types.
  10305. """
  10306. allkeys = (
  10307. 'closePath',
  10308. 'fill',
  10309. 'color',
  10310. 'width',
  10311. 'lineCap',
  10312. 'lineJoin',
  10313. 'dashes',
  10314. 'stroke_opacity',
  10315. 'fill_opacity',
  10316. 'even_odd',
  10317. )
  10318. val = self.get_cdrawings(extended=extended)
  10319. for i in range(len(val)):
  10320. npath = val[i]
  10321. if not npath["type"].startswith("clip"):
  10322. npath["rect"] = Rect(npath["rect"])
  10323. else:
  10324. npath["scissor"] = Rect(npath["scissor"])
  10325. if npath["type"]!="group":
  10326. items = npath["items"]
  10327. newitems = []
  10328. for item in items:
  10329. cmd = item[0]
  10330. rest = item[1:]
  10331. if cmd == "re":
  10332. item = ("re", Rect(rest[0]).normalize(), rest[1])
  10333. elif cmd == "qu":
  10334. item = ("qu", Quad(rest[0]))
  10335. else:
  10336. item = tuple([cmd] + [Point(i) for i in rest])
  10337. newitems.append(item)
  10338. npath["items"] = newitems
  10339. if npath['type'] in ('f', 's'):
  10340. for k in allkeys:
  10341. npath[k] = npath.get(k)
  10342. val[i] = npath
  10343. return val
  10344. class Drawpath(object):
  10345. """Reflects a path dictionary from get_cdrawings()."""
  10346. def __init__(self, **args):
  10347. self.__dict__.update(args)
  10348. class Drawpathlist(object):
  10349. """List of Path objects representing get_cdrawings() output."""
  10350. def __getitem__(self, item):
  10351. return self.paths.__getitem__(item)
  10352. def __init__(self):
  10353. self.paths = []
  10354. self.path_count = 0
  10355. self.group_count = 0
  10356. self.clip_count = 0
  10357. self.fill_count = 0
  10358. self.stroke_count = 0
  10359. self.fillstroke_count = 0
  10360. def __len__(self):
  10361. return self.paths.__len__()
  10362. def append(self, path):
  10363. self.paths.append(path)
  10364. self.path_count += 1
  10365. if path.type == "clip":
  10366. self.clip_count += 1
  10367. elif path.type == "group":
  10368. self.group_count += 1
  10369. elif path.type == "f":
  10370. self.fill_count += 1
  10371. elif path.type == "s":
  10372. self.stroke_count += 1
  10373. elif path.type == "fs":
  10374. self.fillstroke_count += 1
  10375. def clip_parents(self, i):
  10376. """Return list of parent clip paths.
  10377. Args:
  10378. i: (int) return parents of this path.
  10379. Returns:
  10380. List of the clip parents."""
  10381. if i >= self.path_count:
  10382. raise IndexError("bad path index")
  10383. while i < 0:
  10384. i += self.path_count
  10385. lvl = self.paths[i].level
  10386. clips = list( # clip paths before identified one
  10387. reversed(
  10388. [
  10389. p
  10390. for p in self.paths[:i]
  10391. if p.type == "clip" and p.level < lvl
  10392. ]
  10393. )
  10394. )
  10395. if clips == []: # none found: empty list
  10396. return []
  10397. nclips = [clips[0]] # init return list
  10398. for p in clips[1:]:
  10399. if p.level >= nclips[-1].level:
  10400. continue # only accept smaller clip levels
  10401. nclips.append(p)
  10402. return nclips
  10403. def group_parents(self, i):
  10404. """Return list of parent group paths.
  10405. Args:
  10406. i: (int) return parents of this path.
  10407. Returns:
  10408. List of the group parents."""
  10409. if i >= self.path_count:
  10410. raise IndexError("bad path index")
  10411. while i < 0:
  10412. i += self.path_count
  10413. lvl = self.paths[i].level
  10414. groups = list( # group paths before identified one
  10415. reversed(
  10416. [
  10417. p
  10418. for p in self.paths[:i]
  10419. if p.type == "group" and p.level < lvl
  10420. ]
  10421. )
  10422. )
  10423. if groups == []: # none found: empty list
  10424. return []
  10425. ngroups = [groups[0]] # init return list
  10426. for p in groups[1:]:
  10427. if p.level >= ngroups[-1].level:
  10428. continue # only accept smaller group levels
  10429. ngroups.append(p)
  10430. return ngroups
  10431. def get_lineart(self) -> object:
  10432. """Get page drawings paths.
  10433. Note:
  10434. For greater comfort, this method converts point-like, rect-like, quad-like
  10435. tuples of the C version to respective Point / Rect / Quad objects.
  10436. Also adds default items that are missing in original path types.
  10437. In contrast to get_drawings(), this output is an object.
  10438. """
  10439. val = self.get_cdrawings(extended=True)
  10440. paths = self.Drawpathlist()
  10441. for path in val:
  10442. npath = self.Drawpath(**path)
  10443. if npath.type != "clip":
  10444. npath.rect = Rect(path["rect"])
  10445. else:
  10446. npath.scissor = Rect(path["scissor"])
  10447. if npath.type != "group":
  10448. items = path["items"]
  10449. newitems = []
  10450. for item in items:
  10451. cmd = item[0]
  10452. rest = item[1:]
  10453. if cmd == "re":
  10454. item = ("re", Rect(rest[0]).normalize(), rest[1])
  10455. elif cmd == "qu":
  10456. item = ("qu", Quad(rest[0]))
  10457. else:
  10458. item = tuple([cmd] + [Point(i) for i in rest])
  10459. newitems.append(item)
  10460. npath.items = newitems
  10461. if npath.type == "f":
  10462. npath.stroke_opacity = None
  10463. npath.dashes = None
  10464. npath.line_join = None
  10465. npath.line_cap = None
  10466. npath.color = None
  10467. npath.width = None
  10468. paths.append(npath)
  10469. val = None
  10470. return paths
  10471. def get_image_info(
  10472. page: 'Page',
  10473. hashes: bool = False,
  10474. xrefs: bool = False
  10475. ) -> list:
  10476. """Extract image information only from a pymupdf.TextPage.
  10477. Args:
  10478. hashes: (bool) include MD5 hash for each image.
  10479. xrefs: (bool) try to find the xref for each image. Sets hashes to true.
  10480. """
  10481. doc = page.parent
  10482. if xrefs and doc.is_pdf:
  10483. hashes = True
  10484. if not doc.is_pdf:
  10485. xrefs = False
  10486. imginfo = getattr(page, "_image_info", None)
  10487. if imginfo and not xrefs:
  10488. return imginfo
  10489. if not imginfo:
  10490. tp = page.get_textpage(flags=TEXT_PRESERVE_IMAGES)
  10491. imginfo = tp.extractIMGINFO(hashes=hashes)
  10492. del tp
  10493. if hashes:
  10494. page._image_info = imginfo
  10495. if not xrefs or not doc.is_pdf:
  10496. return imginfo
  10497. imglist = page.get_images()
  10498. digests = {}
  10499. for item in imglist:
  10500. xref = item[0]
  10501. pix = Pixmap(doc, xref)
  10502. digests[pix.digest] = xref
  10503. del pix
  10504. for i in range(len(imginfo)):
  10505. item = imginfo[i]
  10506. xref = digests.get(item["digest"], 0)
  10507. item["xref"] = xref
  10508. imginfo[i] = item
  10509. return imginfo
  10510. def get_image_rects(page: 'Page', name, transform=False) -> list:
  10511. """Return list of image positions on a page.
  10512. Args:
  10513. name: (str, list, int) image identification. May be reference name, an
  10514. item of the page's image list or an xref.
  10515. transform: (bool) whether to also return the transformation matrix.
  10516. Returns:
  10517. A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix)
  10518. for all image locations on the page.
  10519. """
  10520. if type(name) in (list, tuple):
  10521. xref = name[0]
  10522. elif type(name) is int:
  10523. xref = name
  10524. else:
  10525. imglist = [i for i in page.get_images() if i[7] == name]
  10526. if imglist == []:
  10527. raise ValueError("bad image name")
  10528. elif len(imglist) != 1:
  10529. raise ValueError("multiple image names found")
  10530. xref = imglist[0][0]
  10531. pix = Pixmap(page.parent, xref) # make pixmap of the image to compute MD5
  10532. digest = pix.digest
  10533. del pix
  10534. infos = page.get_image_info(hashes=True)
  10535. if not transform:
  10536. bboxes = [Rect(im["bbox"]) for im in infos if im["digest"] == digest]
  10537. else:
  10538. bboxes = [
  10539. (Rect(im["bbox"]), Matrix(im["transform"]))
  10540. for im in infos
  10541. if im["digest"] == digest
  10542. ]
  10543. return bboxes
  10544. def get_label(page):
  10545. """Return the label for this PDF page.
  10546. Args:
  10547. page: page object.
  10548. Returns:
  10549. The label (str) of the page. Errors return an empty string.
  10550. """
  10551. # Jorj McKie, 2021-01-06
  10552. labels = page.parent._get_page_labels()
  10553. if not labels:
  10554. return ""
  10555. labels.sort()
  10556. return utils.get_label_pno(page.number, labels)
  10557. def get_links(page: 'Page') -> list:
  10558. """Create a list of all links contained in a PDF page.
  10559. Notes:
  10560. see PyMuPDF ducmentation for details.
  10561. """
  10562. CheckParent(page)
  10563. ln = page.first_link
  10564. links = []
  10565. while ln:
  10566. nl = utils.getLinkDict(ln, page.parent)
  10567. links.append(nl)
  10568. ln = ln.next
  10569. if links != [] and page.parent.is_pdf:
  10570. linkxrefs = [x for x in
  10571. #page.annot_xrefs()
  10572. JM_get_annot_xref_list2(page)
  10573. if x[1] == mupdf.PDF_ANNOT_LINK # pylint: disable=no-member
  10574. ]
  10575. if len(linkxrefs) == len(links):
  10576. for i in range(len(linkxrefs)):
  10577. links[i]["xref"] = linkxrefs[i][0]
  10578. links[i]["id"] = linkxrefs[i][2]
  10579. return links
  10580. def get_pixmap(
  10581. page: 'Page',
  10582. *,
  10583. matrix: matrix_like=Identity,
  10584. dpi=None,
  10585. colorspace: Colorspace=None,
  10586. clip: rect_like=None,
  10587. alpha: bool=False,
  10588. annots: bool=True,
  10589. ) -> 'Pixmap':
  10590. """Create pixmap of page.
  10591. Keyword args:
  10592. matrix: Matrix for transformation (default: Identity).
  10593. dpi: desired dots per inch. If given, matrix is ignored.
  10594. colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
  10595. clip: (irect-like) restrict rendering to this area.
  10596. alpha: (bool) whether to include alpha channel
  10597. annots: (bool) whether to also render annotations
  10598. """
  10599. if colorspace is None:
  10600. colorspace = csRGB
  10601. if dpi:
  10602. zoom = dpi / 72
  10603. matrix = Matrix(zoom, zoom)
  10604. if type(colorspace) is str:
  10605. if colorspace.upper() == "GRAY":
  10606. colorspace = csGRAY
  10607. elif colorspace.upper() == "CMYK":
  10608. colorspace = csCMYK
  10609. else:
  10610. colorspace = csRGB
  10611. if colorspace.n not in (1, 3, 4):
  10612. raise ValueError("unsupported colorspace")
  10613. dl = page.get_displaylist(annots=annots)
  10614. pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
  10615. dl = None
  10616. if dpi:
  10617. pix.set_dpi(dpi, dpi)
  10618. return pix
  10619. def remove_rotation(self):
  10620. """Set page rotation to 0 while maintaining visual appearance."""
  10621. rot = self.rotation # normalized rotation value
  10622. if rot == 0:
  10623. return Identity # nothing to do
  10624. # need to derotate the page's content
  10625. mb = self.mediabox # current mediabox
  10626. if rot == 90:
  10627. # before derotation, shift content horizontally
  10628. mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
  10629. elif rot == 270:
  10630. # before derotation, shift content vertically
  10631. mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
  10632. else: # rot = 180
  10633. mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
  10634. # prefix with derotation matrix
  10635. mat = mat0 * self.derotation_matrix
  10636. cmd = _format_g(tuple(mat)) + ' cm '
  10637. cmd = cmd.encode('utf8')
  10638. _ = TOOLS._insert_contents(self, cmd, False) # prepend to page contents
  10639. # swap x- and y-coordinates
  10640. if rot in (90, 270):
  10641. x0, y0, x1, y1 = mb
  10642. mb.x0 = y0
  10643. mb.y0 = x0
  10644. mb.x1 = y1
  10645. mb.y1 = x1
  10646. self.set_mediabox(mb)
  10647. self.set_rotation(0)
  10648. rot = ~mat # inverse of the derotation matrix
  10649. for annot in self.annots(): # modify rectangles of annotations
  10650. r = annot.rect * rot
  10651. # TODO: only try to set rectangle for applicable annot types
  10652. annot.set_rect(r)
  10653. for link in self.get_links(): # modify 'from' rectangles of links
  10654. r = link["from"] * rot
  10655. self.delete_link(link)
  10656. link["from"] = r
  10657. try: # invalid links remain deleted
  10658. self.insert_link(link)
  10659. except Exception:
  10660. pass
  10661. for widget in self.widgets(): # modify field rectangles
  10662. r = widget.rect * rot
  10663. widget.rect = r
  10664. widget.update()
  10665. return rot # the inverse of the generated derotation matrix
  10666. def cluster_drawings(
  10667. self, clip=None, drawings=None, x_tolerance: float = 3, y_tolerance: float = 3,
  10668. final_filter: bool = True,
  10669. ) -> list:
  10670. """Join rectangles of neighboring vector graphic items.
  10671. Args:
  10672. clip: optional rect-like to restrict the page area to consider.
  10673. drawings: (optional) output of a previous "get_drawings()".
  10674. x_tolerance: horizontal neighborhood threshold.
  10675. y_tolerance: vertical neighborhood threshold.
  10676. Notes:
  10677. Vector graphics (also called line-art or drawings) usually consist
  10678. of independent items like rectangles, lines or curves to jointly
  10679. form table grid lines or bar, line, pie charts and similar.
  10680. This method identifies rectangles wrapping these disparate items.
  10681. Returns:
  10682. A list of Rect items, each wrapping line-art items that are close
  10683. enough to be considered forming a common vector graphic.
  10684. Only "significant" rectangles will be returned, i.e. having both,
  10685. width and height larger than the tolerance values.
  10686. """
  10687. CheckParent(self)
  10688. parea = self.rect # the default clipping area
  10689. if clip is not None:
  10690. parea = Rect(clip)
  10691. delta_x = x_tolerance # shorter local name
  10692. delta_y = y_tolerance # shorter local name
  10693. if drawings is None: # if we cannot re-use a previous output
  10694. drawings = self.get_drawings()
  10695. def are_neighbors(r1, r2):
  10696. """Detect whether r1, r2 are "neighbors".
  10697. Items r1, r2 are called neighbors if the minimum distance between
  10698. their points is less-equal delta.
  10699. Both parameters must be (potentially invalid) rectangles.
  10700. """
  10701. # normalize rectangles as needed
  10702. rr1_x0, rr1_x1 = (r1.x0, r1.x1) if r1.x1 > r1.x0 else (r1.x1, r1.x0)
  10703. rr1_y0, rr1_y1 = (r1.y0, r1.y1) if r1.y1 > r1.y0 else (r1.y1, r1.y0)
  10704. rr2_x0, rr2_x1 = (r2.x0, r2.x1) if r2.x1 > r2.x0 else (r2.x1, r2.x0)
  10705. rr2_y0, rr2_y1 = (r2.y0, r2.y1) if r2.y1 > r2.y0 else (r2.y1, r2.y0)
  10706. if (
  10707. 0
  10708. or rr1_x1 < rr2_x0 - delta_x
  10709. or rr1_x0 > rr2_x1 + delta_x
  10710. or rr1_y1 < rr2_y0 - delta_y
  10711. or rr1_y0 > rr2_y1 + delta_y
  10712. ):
  10713. # Rects do not overlap.
  10714. return False
  10715. else:
  10716. # Rects overlap.
  10717. return True
  10718. # exclude graphics not contained in the clip
  10719. paths = [
  10720. p
  10721. for p in drawings
  10722. if 1
  10723. and p["rect"].x0 >= parea.x0
  10724. and p["rect"].x1 <= parea.x1
  10725. and p["rect"].y0 >= parea.y0
  10726. and p["rect"].y1 <= parea.y1
  10727. ]
  10728. # list of all vector graphic rectangles
  10729. prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
  10730. new_rects = [] # the final list of the joined rectangles
  10731. # -------------------------------------------------------------------------
  10732. # The strategy is to identify and join all rects that are neighbors
  10733. # -------------------------------------------------------------------------
  10734. while prects: # the algorithm will empty this list
  10735. r = +prects[0] # copy of first rectangle
  10736. repeat = True
  10737. while repeat:
  10738. repeat = False
  10739. for i in range(len(prects) - 1, 0, -1): # from back to front
  10740. if are_neighbors(prects[i], r):
  10741. r |= prects[i].tl # include in first rect
  10742. r |= prects[i].br # include in first rect
  10743. del prects[i] # delete this rect
  10744. repeat = True
  10745. new_rects.append(r)
  10746. del prects[0]
  10747. prects = sorted(set(prects), key=lambda r: (r.y1, r.x0))
  10748. new_rects = sorted(set(new_rects), key=lambda r: (r.y1, r.x0))
  10749. if not final_filter:
  10750. return new_rects
  10751. return [r for r in new_rects if r.width > delta_x and r.height > delta_y]
  10752. def get_fonts(self, full=False):
  10753. """List of fonts defined in the page object."""
  10754. CheckParent(self)
  10755. return self.parent.get_page_fonts(self.number, full=full)
  10756. def get_image_bbox(self, name, transform=0):
  10757. """Get rectangle occupied by image 'name'.
  10758. 'name' is either an item of the image list, or the referencing
  10759. name string - elem[7] of the resp. item.
  10760. Option 'transform' also returns the image transformation matrix.
  10761. """
  10762. CheckParent(self)
  10763. doc = self.parent
  10764. if doc.is_closed or doc.is_encrypted:
  10765. raise ValueError('document closed or encrypted')
  10766. inf_rect = Rect(1, 1, -1, -1)
  10767. null_mat = Matrix()
  10768. if transform:
  10769. rc = (inf_rect, null_mat)
  10770. else:
  10771. rc = inf_rect
  10772. if type(name) in (list, tuple):
  10773. if not type(name[-1]) is int:
  10774. raise ValueError('need item of full page image list')
  10775. item = name
  10776. else:
  10777. imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]]
  10778. if len(imglist) == 1:
  10779. item = imglist[0]
  10780. elif imglist == []:
  10781. raise ValueError('bad image name')
  10782. else:
  10783. raise ValueError("found multiple images named '%s'." % name)
  10784. xref = item[-1]
  10785. if xref != 0 or transform:
  10786. try:
  10787. return self.get_image_rects(item, transform=transform)[0]
  10788. except Exception:
  10789. exception_info()
  10790. return inf_rect
  10791. pdf_page = self._pdf_page()
  10792. val = JM_image_reporter(pdf_page)
  10793. if not bool(val):
  10794. return rc
  10795. for v in val:
  10796. if v[0] != item[-3]:
  10797. continue
  10798. q = Quad(v[1])
  10799. bbox = q.rect
  10800. if transform == 0:
  10801. rc = bbox
  10802. break
  10803. hm = Matrix(util_hor_matrix(q.ll, q.lr))
  10804. h = abs(q.ll - q.ul)
  10805. w = abs(q.ur - q.ul)
  10806. m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0)
  10807. m = ~(hm * m0)
  10808. rc = (bbox, m)
  10809. break
  10810. val = rc
  10811. return val
  10812. def get_images(self, full=False):
  10813. """List of images defined in the page object."""
  10814. CheckParent(self)
  10815. return self.parent.get_page_images(self.number, full=full)
  10816. def get_oc_items(self) -> list:
  10817. """Get OCGs and OCMDs used in the page's contents.
  10818. Returns:
  10819. List of items (name, xref, type), where type is one of "ocg" / "ocmd",
  10820. and name is the property name.
  10821. """
  10822. rc = []
  10823. for pname, xref in self._get_resource_properties():
  10824. text = self.parent.xref_object(xref, compressed=True)
  10825. if "/Type/OCG" in text:
  10826. octype = "ocg"
  10827. elif "/Type/OCMD" in text:
  10828. octype = "ocmd"
  10829. else:
  10830. continue
  10831. rc.append((pname, xref, octype))
  10832. return rc
  10833. def get_svg_image(self, matrix=None, text_as_path=1):
  10834. """Make SVG image from page."""
  10835. CheckParent(self)
  10836. mediabox = mupdf.fz_bound_page(self.this)
  10837. ctm = JM_matrix_from_py(matrix)
  10838. tbounds = mediabox
  10839. text_option = mupdf.FZ_SVG_TEXT_AS_PATH if text_as_path == 1 else mupdf.FZ_SVG_TEXT_AS_TEXT
  10840. tbounds = mupdf.fz_transform_rect(tbounds, ctm)
  10841. res = mupdf.fz_new_buffer(1024)
  10842. out = mupdf.FzOutput(res)
  10843. dev = mupdf.fz_new_svg_device(
  10844. out,
  10845. tbounds.x1-tbounds.x0, # width
  10846. tbounds.y1-tbounds.y0, # height
  10847. text_option,
  10848. 1,
  10849. )
  10850. mupdf.fz_run_page(self.this, dev, ctm, mupdf.FzCookie())
  10851. mupdf.fz_close_device(dev)
  10852. out.fz_close_output()
  10853. text = JM_EscapeStrFromBuffer(res)
  10854. return text
  10855. def get_textbox(
  10856. page: Page,
  10857. rect: rect_like,
  10858. textpage=None, #: TextPage = None,
  10859. ) -> str:
  10860. tp = textpage
  10861. if tp is None:
  10862. tp = page.get_textpage()
  10863. elif getattr(tp, "parent") != page:
  10864. raise ValueError("not a textpage of this page")
  10865. rc = tp.extractTextbox(rect)
  10866. if textpage is None:
  10867. del tp
  10868. return rc
  10869. def get_text(self, *args, **kwargs):
  10870. return utils.get_text(self, *args, **kwargs)
  10871. def get_text_blocks(self, *args, **kwargs):
  10872. return utils.get_text_blocks(self, *args, **kwargs)
  10873. def get_text_selection(self, *args, **kwargs):
  10874. return utils.get_text_selection(self, *args, **kwargs)
  10875. def get_text_words(self, *args, **kwargs):
  10876. return utils.get_text_words(self, *args, **kwargs)
  10877. def get_textpage_ocr(self, *args, **kwargs):
  10878. return utils.get_textpage_ocr(self, *args, **kwargs)
  10879. def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
  10880. CheckParent(self)
  10881. if matrix is None:
  10882. matrix = Matrix(1, 1)
  10883. old_rotation = self.rotation
  10884. if old_rotation != 0:
  10885. self.set_rotation(0)
  10886. try:
  10887. textpage = self._get_textpage(clip, flags=flags, matrix=matrix)
  10888. finally:
  10889. if old_rotation != 0:
  10890. self.set_rotation(old_rotation)
  10891. textpage = TextPage(textpage)
  10892. textpage.parent = weakref.proxy(self)
  10893. return textpage
  10894. def get_texttrace(self):
  10895. CheckParent(self)
  10896. old_rotation = self.rotation
  10897. if old_rotation != 0:
  10898. self.set_rotation(0)
  10899. page = self.this
  10900. rc = []
  10901. if 1 or g_use_extra:
  10902. dev = extra.JM_new_texttrace_device(rc)
  10903. else:
  10904. dev = JM_new_texttrace_device(rc)
  10905. prect = mupdf.fz_bound_page(page)
  10906. dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
  10907. mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  10908. mupdf.fz_close_device(dev)
  10909. if old_rotation != 0:
  10910. self.set_rotation(old_rotation)
  10911. return rc
  10912. def get_xobjects(self):
  10913. """List of xobjects defined in the page object."""
  10914. CheckParent(self)
  10915. return self.parent.get_page_xobjects(self.number)
  10916. def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None,
  10917. set_simple=False, wmode=0, encoding=0):
  10918. doc = self.parent
  10919. if doc is None:
  10920. raise ValueError("orphaned object: parent is None")
  10921. idx = 0
  10922. if fontname.startswith("/"):
  10923. fontname = fontname[1:]
  10924. inv_chars = INVALID_NAME_CHARS.intersection(fontname)
  10925. if inv_chars != set():
  10926. raise ValueError(f"bad fontname chars {inv_chars}")
  10927. font = CheckFont(self, fontname)
  10928. if font is not None: # font already in font list of page
  10929. xref = font[0] # this is the xref
  10930. if CheckFontInfo(doc, xref): # also in our document font list?
  10931. return xref # yes: we are done
  10932. # need to build the doc FontInfo entry - done via get_char_widths
  10933. doc.get_char_widths(xref)
  10934. return xref
  10935. #--------------------------------------------------------------------------
  10936. # the font is not present for this page
  10937. #--------------------------------------------------------------------------
  10938. bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font
  10939. serif = 0
  10940. CJK_number = -1
  10941. CJK_list_n = ["china-t", "china-s", "japan", "korea"]
  10942. CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"]
  10943. try:
  10944. CJK_number = CJK_list_n.index(fontname)
  10945. serif = 0
  10946. except Exception:
  10947. # Verbose in PyMuPDF/tests.
  10948. if g_exceptions_verbose > 1: exception_info()
  10949. pass
  10950. if CJK_number < 0:
  10951. try:
  10952. CJK_number = CJK_list_s.index(fontname)
  10953. serif = 1
  10954. except Exception:
  10955. # Verbose in PyMuPDF/tests.
  10956. if g_exceptions_verbose > 1: exception_info()
  10957. pass
  10958. if fontname.lower() in fitz_fontdescriptors.keys():
  10959. import pymupdf_fonts
  10960. fontbuffer = pymupdf_fonts.myfont(fontname) # make a copy
  10961. del pymupdf_fonts
  10962. # install the font for the page
  10963. if fontfile is not None:
  10964. if type(fontfile) is str:
  10965. fontfile_str = fontfile
  10966. elif hasattr(fontfile, "absolute"):
  10967. fontfile_str = str(fontfile)
  10968. elif hasattr(fontfile, "name"):
  10969. fontfile_str = fontfile.name
  10970. else:
  10971. raise ValueError("bad fontfile")
  10972. else:
  10973. fontfile_str = None
  10974. val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx,
  10975. wmode, serif, encoding, CJK_number)
  10976. if not val: # did not work, error return
  10977. return val
  10978. xref = val[0] # xref of installed font
  10979. fontdict = val[1]
  10980. if CheckFontInfo(doc, xref): # check again: document already has this font
  10981. return xref # we are done
  10982. # need to create document font info
  10983. doc.get_char_widths(xref, fontdict=fontdict)
  10984. return xref
  10985. def insert_htmlbox(
  10986. page,
  10987. rect,
  10988. text,
  10989. *,
  10990. css=None,
  10991. scale_low=0,
  10992. archive=None,
  10993. rotate=0,
  10994. oc=0,
  10995. opacity=1,
  10996. overlay=True,
  10997. _scale_word_width=True,
  10998. _verbose=False,
  10999. ) -> tuple:
  11000. """Insert text with optional HTML tags and stylings into a rectangle.
  11001. Args:
  11002. rect: (rect-like) rectangle into which the text should be placed.
  11003. text: (str) text with optional HTML tags and stylings.
  11004. css: (str) CSS styling commands.
  11005. scale_low: (float) force-fit content by scaling it down. Must be in
  11006. range [0, 1]. If 1, no scaling will take place. If 0, arbitrary
  11007. down-scaling is acceptable. A value of 0.1 would mean that content
  11008. may be scaled down by at most 90%.
  11009. archive: Archive object pointing to locations of used fonts or images
  11010. rotate: (int) rotate the text in the box by a multiple of 90 degrees.
  11011. oc: (int) the xref of an OCG / OCMD (Optional Content).
  11012. opacity: (float) set opacity of inserted content.
  11013. overlay: (bool) put text on top of page content.
  11014. _scale_word_width: internal, for testing only.
  11015. _verbose: internal, for testing only.
  11016. Returns:
  11017. A tuple of floats (spare_height, scale).
  11018. spare_height:
  11019. The height of the remaining space in <rect> below the
  11020. text, or -1 if we failed to fit.
  11021. scale:
  11022. The scaling required; `0 < scale <= 1`.
  11023. Will be less than `scale_low` if we failed to fit.
  11024. """
  11025. # normalize rotation angle
  11026. if not rotate % 90 == 0:
  11027. raise ValueError("bad rotation angle")
  11028. while rotate < 0:
  11029. rotate += 360
  11030. while rotate >= 360:
  11031. rotate -= 360
  11032. if not 0 <= scale_low <= 1:
  11033. raise ValueError("'scale_low' must be in [0, 1]")
  11034. if css is None:
  11035. css = ""
  11036. rect = Rect(rect)
  11037. if rotate in (90, 270):
  11038. temp_rect = Rect(0, 0, rect.height, rect.width)
  11039. else:
  11040. temp_rect = Rect(0, 0, rect.width, rect.height)
  11041. # use a small border by default
  11042. mycss = "body {margin:1px;}" + css # append user CSS
  11043. # either make a story, or accept a given one
  11044. if isinstance(text, str): # if a string, convert to a Story
  11045. story = Story(html=text, user_css=mycss, archive=archive)
  11046. elif isinstance(text, Story):
  11047. story = text
  11048. else:
  11049. raise ValueError("'text' must be a string or a Story")
  11050. # ----------------------------------------------------------------
  11051. # Find a scaling factor that lets our story fit in. Instead of scaling
  11052. # the text smaller, we instead look at how much bigger the rect needs
  11053. # to be to fit the text, then reverse the scaling to get how much we
  11054. # need to scale down the text.
  11055. # ----------------------------------------------------------------
  11056. rect_scale_max = None if scale_low == 0 else 1 / scale_low
  11057. fit = story.fit_scale(
  11058. temp_rect,
  11059. scale_min=1,
  11060. scale_max=rect_scale_max,
  11061. flags=mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW if _scale_word_width else 0,
  11062. verbose=_verbose,
  11063. )
  11064. if not fit.big_enough: # there was no fit
  11065. scale = 1 / fit.parameter
  11066. return (-1, scale)
  11067. # fit.filled is a tuple; we convert it in place to a Rect for
  11068. # convenience. (fit.rect is already a Rect.)
  11069. fit.filled = Rect(fit.filled)
  11070. assert (fit.rect.x0, fit.rect.y0) == (0, 0)
  11071. assert (fit.filled.x0, fit.filled.y0) == (0, 0)
  11072. scale = 1 / fit.parameter
  11073. assert scale >= scale_low, f'{scale_low=} {scale=}'
  11074. spare_height = max((fit.rect.y1 - fit.filled.y1) * scale, 0)
  11075. def rect_function(*args):
  11076. return fit.rect, fit.rect, None
  11077. # draw story on temp PDF page
  11078. doc = story.write_with_links(rect_function)
  11079. # Insert opacity if requested.
  11080. # For this, we prepend a command to the /Contents.
  11081. if 0 <= opacity < 1:
  11082. tpage = doc[0] # load page
  11083. # generate /ExtGstate for the page
  11084. alp0 = tpage._set_opacity(CA=opacity, ca=opacity)
  11085. s = f"/{alp0} gs\n" # generate graphic state command
  11086. TOOLS._insert_contents(tpage, s.encode(), 0)
  11087. # put result in target page
  11088. page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay)
  11089. # -------------------------------------------------------------------------
  11090. # re-insert links in target rect (show_pdf_page cannot copy annotations)
  11091. # -------------------------------------------------------------------------
  11092. # scaled center point of fit.rect
  11093. mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale
  11094. # center point of target rect
  11095. mp2 = (rect.tl + rect.br) / 2
  11096. # compute link positioning matrix:
  11097. # - move center of scaled-down fit.rect to (0,0)
  11098. # - rotate
  11099. # - move (0,0) to center of target rect
  11100. mat = (
  11101. Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y)
  11102. * Matrix(-rotate)
  11103. * Matrix(1, 0, 0, 1, mp2.x, mp2.y)
  11104. )
  11105. # copy over links
  11106. for link in doc[0].get_links():
  11107. link["from"] *= mat
  11108. page.insert_link(link)
  11109. return spare_height, scale
  11110. def insert_image(
  11111. page,
  11112. rect,
  11113. *,
  11114. alpha=-1,
  11115. filename=None,
  11116. height=0,
  11117. keep_proportion=True,
  11118. mask=None,
  11119. oc=0,
  11120. overlay=True,
  11121. pixmap=None,
  11122. rotate=0,
  11123. stream=None,
  11124. width=0,
  11125. xref=0,
  11126. ):
  11127. """Insert an image for display in a rectangle.
  11128. Args:
  11129. rect: (rect_like) position of image on the page.
  11130. alpha: (int, optional) set to 0 if image has no transparency.
  11131. filename: (str, Path, file object) image filename.
  11132. height: (int)
  11133. keep_proportion: (bool) keep width / height ratio (default).
  11134. mask: (bytes, optional) image consisting of alpha values to use.
  11135. oc: (int) xref of OCG or OCMD to declare as Optional Content.
  11136. overlay: (bool) put in foreground (default) or background.
  11137. pixmap: (pymupdf.Pixmap) use this as image.
  11138. rotate: (int) rotate by 0, 90, 180 or 270 degrees.
  11139. stream: (bytes) use this as image.
  11140. width: (int)
  11141. xref: (int) use this as image.
  11142. 'page' and 'rect' are positional, all other parameters are keywords.
  11143. If 'xref' is given, that image is used. Other input options are ignored.
  11144. Else, exactly one of pixmap, stream or filename must be given.
  11145. 'alpha=0' for non-transparent images improves performance significantly.
  11146. Affects stream and filename only.
  11147. Optimum transparent insertions are possible by using filename / stream in
  11148. conjunction with a 'mask' image of alpha values.
  11149. Returns:
  11150. xref (int) of inserted image. Re-use as argument for multiple insertions.
  11151. """
  11152. CheckParent(page)
  11153. doc = page.parent
  11154. if not doc.is_pdf:
  11155. raise ValueError("is no PDF")
  11156. if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
  11157. raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")
  11158. if filename:
  11159. if type(filename) is str:
  11160. pass
  11161. elif hasattr(filename, "absolute"):
  11162. filename = str(filename)
  11163. elif hasattr(filename, "name"):
  11164. filename = filename.name
  11165. else:
  11166. raise ValueError("bad filename")
  11167. if filename and not os.path.exists(filename):
  11168. raise FileNotFoundError("No such file: '%s'" % filename)
  11169. elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
  11170. raise ValueError("stream must be bytes-like / BytesIO")
  11171. elif pixmap and type(pixmap) is not Pixmap:
  11172. raise ValueError("pixmap must be a Pixmap")
  11173. if mask and not (stream or filename):
  11174. raise ValueError("mask requires stream or filename")
  11175. if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
  11176. raise ValueError("mask must be bytes-like / BytesIO")
  11177. while rotate < 0:
  11178. rotate += 360
  11179. while rotate >= 360:
  11180. rotate -= 360
  11181. if rotate not in (0, 90, 180, 270):
  11182. raise ValueError("bad rotate value")
  11183. r = Rect(rect)
  11184. if r.is_empty or r.is_infinite:
  11185. raise ValueError("rect must be finite and not empty")
  11186. clip = r * ~page.transformation_matrix
  11187. # Create a unique image reference name.
  11188. ilst = [i[7] for i in doc.get_page_images(page.number)]
  11189. ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
  11190. ilst += [i[4] for i in doc.get_page_fonts(page.number)]
  11191. n = "fzImg" # 'pymupdf image'
  11192. i = 0
  11193. _imgname = n + "0" # first name candidate
  11194. while _imgname in ilst:
  11195. i += 1
  11196. _imgname = n + str(i) # try new name
  11197. if overlay:
  11198. page.wrap_contents() # ensure a balanced graphics state
  11199. digests = doc.InsertedImages
  11200. xref, digests = page._insert_image(
  11201. filename=filename,
  11202. pixmap=pixmap,
  11203. stream=stream,
  11204. imask=mask,
  11205. clip=clip,
  11206. overlay=overlay,
  11207. oc=oc,
  11208. xref=xref,
  11209. rotate=rotate,
  11210. keep_proportion=keep_proportion,
  11211. width=width,
  11212. height=height,
  11213. alpha=alpha,
  11214. _imgname=_imgname,
  11215. digests=digests,
  11216. )
  11217. if digests is not None:
  11218. doc.InsertedImages = digests
  11219. return xref
  11220. def insert_link(page: 'Page', lnk: dict, mark: bool = True) -> None:
  11221. """Insert a new link for the current page."""
  11222. CheckParent(page)
  11223. annot = utils.getLinkText(page, lnk)
  11224. if annot == "":
  11225. raise ValueError("link kind not supported")
  11226. page._addAnnot_FromString((annot,))
  11227. def insert_text(
  11228. page: 'Page',
  11229. point: point_like,
  11230. text: typing.Union[str, list],
  11231. *,
  11232. fontsize: float = 11,
  11233. lineheight: OptFloat = None,
  11234. fontname: str = "helv",
  11235. fontfile: OptStr = None,
  11236. set_simple: int = 0,
  11237. encoding: int = 0,
  11238. color: OptSeq = None,
  11239. fill: OptSeq = None,
  11240. border_width: float = 0.05,
  11241. miter_limit: float = 1,
  11242. render_mode: int = 0,
  11243. rotate: int = 0,
  11244. morph: OptSeq = None,
  11245. overlay: bool = True,
  11246. stroke_opacity: float = 1,
  11247. fill_opacity: float = 1,
  11248. oc: int = 0,
  11249. ):
  11250. img = page.new_shape()
  11251. rc = img.insert_text(
  11252. point,
  11253. text,
  11254. fontsize=fontsize,
  11255. lineheight=lineheight,
  11256. fontname=fontname,
  11257. fontfile=fontfile,
  11258. set_simple=set_simple,
  11259. encoding=encoding,
  11260. color=color,
  11261. fill=fill,
  11262. border_width=border_width,
  11263. render_mode=render_mode,
  11264. miter_limit=miter_limit,
  11265. rotate=rotate,
  11266. morph=morph,
  11267. stroke_opacity=stroke_opacity,
  11268. fill_opacity=fill_opacity,
  11269. oc=oc,
  11270. )
  11271. if rc >= 0:
  11272. img.commit(overlay)
  11273. return rc
  11274. def insert_textbox(
  11275. page: 'Page',
  11276. rect: rect_like,
  11277. buffer: typing.Union[str, list],
  11278. *,
  11279. fontname: str = "helv",
  11280. fontfile: OptStr = None,
  11281. set_simple: int = 0,
  11282. encoding: int = 0,
  11283. fontsize: float = 11,
  11284. lineheight: OptFloat = None,
  11285. color: OptSeq = None,
  11286. fill: OptSeq = None,
  11287. expandtabs: int = 1,
  11288. align: int = 0,
  11289. rotate: int = 0,
  11290. render_mode: int = 0,
  11291. miter_limit: float = 1,
  11292. border_width: float = 0.05,
  11293. morph: OptSeq = None,
  11294. overlay: bool = True,
  11295. stroke_opacity: float = 1,
  11296. fill_opacity: float = 1,
  11297. oc: int = 0,
  11298. ) -> float:
  11299. """Insert text into a given rectangle.
  11300. Notes:
  11301. Creates a Shape object, uses its same-named method and commits it.
  11302. Parameters:
  11303. rect: (rect-like) area to use for text.
  11304. buffer: text to be inserted
  11305. fontname: a Base-14 font, font name or '/name'
  11306. fontfile: name of a font file
  11307. fontsize: font size
  11308. lineheight: overwrite the font property
  11309. color: RGB color triple
  11310. expandtabs: handles tabulators with string function
  11311. align: left, center, right, justified
  11312. rotate: 0, 90, 180, or 270 degrees
  11313. morph: morph box with a matrix and a fixpoint
  11314. overlay: put text in foreground or background
  11315. Returns:
  11316. unused or deficit rectangle area (float)
  11317. """
  11318. img = page.new_shape()
  11319. rc = img.insert_textbox(
  11320. rect,
  11321. buffer,
  11322. fontsize=fontsize,
  11323. lineheight=lineheight,
  11324. fontname=fontname,
  11325. fontfile=fontfile,
  11326. set_simple=set_simple,
  11327. encoding=encoding,
  11328. color=color,
  11329. fill=fill,
  11330. expandtabs=expandtabs,
  11331. render_mode=render_mode,
  11332. miter_limit=miter_limit,
  11333. border_width=border_width,
  11334. align=align,
  11335. rotate=rotate,
  11336. morph=morph,
  11337. stroke_opacity=stroke_opacity,
  11338. fill_opacity=fill_opacity,
  11339. oc=oc,
  11340. )
  11341. if rc >= 0:
  11342. img.commit(overlay)
  11343. return rc
  11344. @property
  11345. def is_wrapped(self):
  11346. """Check if /Contents is in a balanced graphics state."""
  11347. return self._count_q_balance() == (0, 0)
  11348. @property
  11349. def language(self):
  11350. """Page language."""
  11351. pdfpage = _as_pdf_page(self.this, required=False)
  11352. if not pdfpage.m_internal:
  11353. return
  11354. lang = mupdf.pdf_dict_get_inheritable(pdfpage.obj(), PDF_NAME('Lang'))
  11355. if not lang.m_internal:
  11356. return
  11357. return mupdf.pdf_to_str_buf(lang)
  11358. def links(self, kinds=None):
  11359. """ Generator over the links of a page.
  11360. Args:
  11361. kinds: (list) link kinds to subselect from. If none,
  11362. all links are returned. E.g. kinds=[LINK_URI]
  11363. will only yield URI links.
  11364. """
  11365. all_links = self.get_links()
  11366. for link in all_links:
  11367. if kinds is None or link["kind"] in kinds:
  11368. yield (link)
  11369. def load_annot(self, ident: typing.Union[str, int]) -> Annot:
  11370. """Load an annot by name (/NM key) or xref.
  11371. Args:
  11372. ident: identifier, either name (str) or xref (int).
  11373. """
  11374. CheckParent(self)
  11375. if type(ident) is str:
  11376. xref = 0
  11377. name = ident
  11378. elif type(ident) is int:
  11379. xref = ident
  11380. name = None
  11381. else:
  11382. raise ValueError("identifier must be a string or integer")
  11383. val = self._load_annot(name, xref)
  11384. if not val:
  11385. return val
  11386. val.thisown = True
  11387. val.parent = weakref.proxy(self)
  11388. self._annot_refs[id(val)] = val
  11389. return val
  11390. def load_links(self):
  11391. """Get first Link."""
  11392. CheckParent(self)
  11393. val = mupdf.fz_load_links( self.this)
  11394. if not val.m_internal:
  11395. return
  11396. val = Link( val)
  11397. val.thisown = True
  11398. val.parent = weakref.proxy(self) # owning page object
  11399. self._annot_refs[id(val)] = val
  11400. val.xref = 0
  11401. val.id = ""
  11402. if self.parent.is_pdf:
  11403. xrefs = self.annot_xrefs()
  11404. xrefs = [x for x in xrefs if x[1] == mupdf.PDF_ANNOT_LINK]
  11405. if xrefs:
  11406. link_id = xrefs[0]
  11407. val.xref = link_id[0]
  11408. val.id = link_id[2]
  11409. else:
  11410. val.xref = 0
  11411. val.id = ""
  11412. return val
  11413. #----------------------------------------------------------------
  11414. # page load widget by xref
  11415. #----------------------------------------------------------------
  11416. def load_widget( self, xref):
  11417. """Load a widget by its xref."""
  11418. CheckParent(self)
  11419. page = _as_pdf_page(self.this)
  11420. annot = JM_get_widget_by_xref( page, xref)
  11421. #log( '{=type(annot)}')
  11422. val = annot
  11423. if not val:
  11424. return val
  11425. val.thisown = True
  11426. val.parent = weakref.proxy(self)
  11427. self._annot_refs[id(val)] = val
  11428. widget = Widget()
  11429. TOOLS._fill_widget(val, widget)
  11430. val = widget
  11431. return val
  11432. @property
  11433. def mediabox(self):
  11434. """The MediaBox."""
  11435. CheckParent(self)
  11436. page = self._pdf_page(required=False)
  11437. if not page.m_internal:
  11438. rect = mupdf.fz_bound_page( self.this)
  11439. else:
  11440. rect = JM_mediabox( page.obj())
  11441. return Rect(rect)
  11442. @property
  11443. def mediabox_size(self):
  11444. return Point(self.mediabox.x1, self.mediabox.y1)
  11445. def new_shape(self):
  11446. return Shape(self)
  11447. #@property
  11448. #def parent( self):
  11449. # assert self._parent
  11450. # if self._parent:
  11451. # return self._parent
  11452. # return Document( self.this.document())
  11453. def read_contents(self):
  11454. """All /Contents streams concatenated to one bytes object."""
  11455. return TOOLS._get_all_contents(self)
  11456. def refresh(self):
  11457. """Refresh page after link/annot/widget updates."""
  11458. CheckParent(self)
  11459. doc = self.parent
  11460. page = doc.reload_page(self)
  11461. # fixme this looks wrong.
  11462. self.this = page
  11463. def replace_image(
  11464. page: 'Page',
  11465. xref: int,
  11466. *,
  11467. filename=None,
  11468. pixmap=None,
  11469. stream=None,
  11470. ):
  11471. """Replace the image referred to by xref.
  11472. Replace the image by changing the object definition stored under xref. This
  11473. will leave the pages appearance instructions intact, so the new image is
  11474. being displayed with the same bbox, rotation etc.
  11475. By providing a small fully transparent image, an effect as if the image had
  11476. been deleted can be achieved.
  11477. A typical use may include replacing large images by a smaller version,
  11478. e.g. with a lower resolution or graylevel instead of colored.
  11479. Args:
  11480. xref: the xref of the image to replace.
  11481. filename, pixmap, stream: exactly one of these must be provided. The
  11482. meaning being the same as in Page.insert_image.
  11483. """
  11484. doc = page.parent # the owning document
  11485. if not doc.xref_is_image(xref):
  11486. raise ValueError("xref not an image") # insert new image anywhere in page
  11487. if bool(filename) + bool(stream) + bool(pixmap) != 1:
  11488. raise ValueError("Exactly one of filename/stream/pixmap must be given")
  11489. new_xref = page.insert_image(
  11490. page.rect, filename=filename, stream=stream, pixmap=pixmap
  11491. )
  11492. doc.xref_copy(new_xref, xref) # copy over new to old
  11493. last_contents_xref = page.get_contents()[-1]
  11494. # new image insertion has created a new /Contents source,
  11495. # which we will set to spaces now
  11496. doc.update_stream(last_contents_xref, b" ")
  11497. page._image_info = None # clear cache of extracted image information
  11498. @property
  11499. def rotation(self):
  11500. """Page rotation."""
  11501. CheckParent(self)
  11502. page = _as_pdf_page(self.this, required=0)
  11503. if not page.m_internal:
  11504. return 0
  11505. return JM_page_rotation(page)
  11506. @property
  11507. def rotation_matrix(self) -> Matrix:
  11508. """Reflects page rotation."""
  11509. return Matrix(TOOLS._rotate_matrix(self))
  11510. def run(self, dw, m):
  11511. """Run page through a device.
  11512. dw: DeviceWrapper
  11513. """
  11514. CheckParent(self)
  11515. mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
  11516. def search_for(
  11517. page,
  11518. text,
  11519. *,
  11520. clip=None,
  11521. quads=False,
  11522. flags=None,
  11523. textpage=None,
  11524. ) -> list:
  11525. """Search for a string on a page.
  11526. Args:
  11527. text: string to be searched for
  11528. clip: restrict search to this rectangle
  11529. quads: (bool) return quads instead of rectangles
  11530. flags: bit switches, default: join hyphened words
  11531. textpage: a pre-created pymupdf.TextPage
  11532. Returns:
  11533. a list of rectangles or quads, each containing one occurrence.
  11534. """
  11535. if flags is None:
  11536. flags=(0
  11537. | TEXT_DEHYPHENATE
  11538. | TEXT_PRESERVE_WHITESPACE
  11539. | TEXT_PRESERVE_LIGATURES
  11540. | TEXT_MEDIABOX_CLIP
  11541. )
  11542. if clip is not None:
  11543. clip = Rect(clip)
  11544. CheckParent(page)
  11545. tp = textpage
  11546. if tp is None:
  11547. tp = page.get_textpage(clip=clip, flags=flags) # create pymupdf.TextPage
  11548. elif getattr(tp, "parent") != page:
  11549. raise ValueError("not a textpage of this page")
  11550. rlist = tp.search(text, quads=quads)
  11551. if textpage is None:
  11552. del tp
  11553. return rlist
  11554. def set_artbox(self, rect):
  11555. """Set the ArtBox."""
  11556. return self._set_pagebox("ArtBox", rect)
  11557. def set_bleedbox(self, rect):
  11558. """Set the BleedBox."""
  11559. return self._set_pagebox("BleedBox", rect)
  11560. def set_contents(self, xref):
  11561. """Set object at 'xref' as the page's /Contents."""
  11562. CheckParent(self)
  11563. doc = self.parent
  11564. if doc.is_closed:
  11565. raise ValueError("document closed")
  11566. if not doc.is_pdf:
  11567. raise ValueError("is no PDF")
  11568. if xref not in range(1, doc.xref_length()):
  11569. raise ValueError("bad xref")
  11570. if not doc.xref_is_stream(xref):
  11571. raise ValueError("xref is no stream")
  11572. doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref)
  11573. def set_cropbox(self, rect):
  11574. """Set the CropBox. Will also change Page.rect."""
  11575. return self._set_pagebox("CropBox", rect)
  11576. def set_language(self, language=None):
  11577. """Set PDF page default language."""
  11578. CheckParent(self)
  11579. pdfpage = _as_pdf_page(self.this)
  11580. if not language:
  11581. mupdf.pdf_dict_del(pdfpage.obj(), PDF_NAME('Lang'))
  11582. else:
  11583. lang = mupdf.fz_text_language_from_string(language)
  11584. assert hasattr(mupdf, 'fz_string_from_text_language2')
  11585. mupdf.pdf_dict_put_text_string(
  11586. pdfpage.obj,
  11587. PDF_NAME('Lang'),
  11588. mupdf.fz_string_from_text_language2(lang)
  11589. )
  11590. def set_mediabox(self, rect):
  11591. """Set the MediaBox."""
  11592. CheckParent(self)
  11593. page = self._pdf_page()
  11594. mediabox = JM_rect_from_py(rect)
  11595. if (mupdf.fz_is_empty_rect(mediabox)
  11596. or mupdf.fz_is_infinite_rect(mediabox)
  11597. ):
  11598. raise ValueError( MSG_BAD_RECT)
  11599. mupdf.pdf_dict_put_rect( page.obj(), PDF_NAME('MediaBox'), mediabox)
  11600. mupdf.pdf_dict_del( page.obj(), PDF_NAME('CropBox'))
  11601. mupdf.pdf_dict_del( page.obj(), PDF_NAME('ArtBox'))
  11602. mupdf.pdf_dict_del( page.obj(), PDF_NAME('BleedBox'))
  11603. mupdf.pdf_dict_del( page.obj(), PDF_NAME('TrimBox'))
  11604. def set_rotation(self, rotation):
  11605. """Set page rotation."""
  11606. CheckParent(self)
  11607. page = _as_pdf_page(self.this)
  11608. rot = JM_norm_rotation(rotation)
  11609. mupdf.pdf_dict_put_int( page.obj(), PDF_NAME('Rotate'), rot)
  11610. def set_trimbox(self, rect):
  11611. """Set the TrimBox."""
  11612. return self._set_pagebox("TrimBox", rect)
  11613. def show_pdf_page(
  11614. page,
  11615. rect,
  11616. docsrc,
  11617. pno=0,
  11618. keep_proportion=True,
  11619. overlay=True,
  11620. oc=0,
  11621. rotate=0,
  11622. clip=None,
  11623. ) -> int:
  11624. """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'.
  11625. Args:
  11626. rect: (rect-like) where to place the source image
  11627. docsrc: (document) source PDF
  11628. pno: (int) source page number
  11629. keep_proportion: (bool) do not change width-height-ratio
  11630. overlay: (bool) put in foreground
  11631. oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF)
  11632. rotate: (int) degrees (multiple of 90)
  11633. clip: (rect-like) part of source page rectangle
  11634. Returns:
  11635. xref of inserted object (for reuse)
  11636. """
  11637. def calc_matrix(sr, tr, keep=True, rotate=0):
  11638. """Calculate transformation matrix from source to target rect.
  11639. Notes:
  11640. The product of four matrices in this sequence: (1) translate correct
  11641. source corner to origin, (2) rotate, (3) scale, (4) translate to
  11642. target's top-left corner.
  11643. Args:
  11644. sr: source rect in PDF (!) coordinate system
  11645. tr: target rect in PDF coordinate system
  11646. keep: whether to keep source ratio of width to height
  11647. rotate: rotation angle in degrees
  11648. Returns:
  11649. Transformation matrix.
  11650. """
  11651. # calc center point of source rect
  11652. smp = (sr.tl + sr.br) / 2.0
  11653. # calc center point of target rect
  11654. tmp = (tr.tl + tr.br) / 2.0
  11655. # m moves to (0, 0), then rotates
  11656. m = Matrix(1, 0, 0, 1, -smp.x, -smp.y) * Matrix(rotate)
  11657. sr1 = sr * m # resulting source rect to calculate scale factors
  11658. fw = tr.width / sr1.width # scale the width
  11659. fh = tr.height / sr1.height # scale the height
  11660. if keep:
  11661. fw = fh = min(fw, fh) # take min if keeping aspect ratio
  11662. m *= Matrix(fw, fh) # concat scale matrix
  11663. m *= Matrix(1, 0, 0, 1, tmp.x, tmp.y) # concat move to target center
  11664. return JM_TUPLE(m)
  11665. CheckParent(page)
  11666. doc = page.parent
  11667. if not doc.is_pdf or not docsrc.is_pdf:
  11668. raise ValueError("is no PDF")
  11669. if rect.is_empty or rect.is_infinite:
  11670. raise ValueError("rect must be finite and not empty")
  11671. while pno < 0: # support negative page numbers
  11672. pno += docsrc.page_count
  11673. src_page = docsrc[pno] # load source page
  11674. tar_rect = rect * ~page.transformation_matrix # target rect in PDF coordinates
  11675. src_rect = src_page.rect if not clip else src_page.rect & clip # source rect
  11676. if src_rect.is_empty or src_rect.is_infinite:
  11677. raise ValueError("clip must be finite and not empty")
  11678. src_rect = src_rect * ~src_page.transformation_matrix # ... in PDF coord
  11679. matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)
  11680. # list of existing /Form /XObjects
  11681. ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
  11682. ilst += [i[7] for i in doc.get_page_images(page.number)]
  11683. ilst += [i[4] for i in doc.get_page_fonts(page.number)]
  11684. # create a name not in that list
  11685. n = "fzFrm"
  11686. i = 0
  11687. _imgname = n + "0"
  11688. while _imgname in ilst:
  11689. i += 1
  11690. _imgname = n + str(i)
  11691. isrc = docsrc._graft_id # used as key for graftmaps
  11692. if doc._graft_id == isrc:
  11693. raise ValueError("source document must not equal target")
  11694. # retrieve / make Graftmap for source PDF
  11695. gmap = doc.Graftmaps.get(isrc, None)
  11696. if gmap is None:
  11697. gmap = Graftmap(doc)
  11698. doc.Graftmaps[isrc] = gmap
  11699. # take note of generated xref for automatic reuse
  11700. pno_id = (isrc, pno) # id of docsrc[pno]
  11701. xref = doc.ShownPages.get(pno_id, 0)
  11702. if overlay:
  11703. page.wrap_contents() # ensure a balanced graphics state
  11704. xref = page._show_pdf_page(
  11705. src_page,
  11706. overlay=overlay,
  11707. matrix=matrix,
  11708. xref=xref,
  11709. oc=oc,
  11710. clip=src_rect,
  11711. graftmap=gmap,
  11712. _imgname=_imgname,
  11713. )
  11714. doc.ShownPages[pno_id] = xref
  11715. return xref
  11716. @property
  11717. def transformation_matrix(self):
  11718. """Page transformation matrix."""
  11719. CheckParent(self)
  11720. ctm = mupdf.FzMatrix()
  11721. page = self._pdf_page(required=False)
  11722. if not page.m_internal:
  11723. return JM_py_from_matrix(ctm)
  11724. mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) # fixme: original code passed mediabox=NULL.
  11725. mupdf.pdf_page_transform(page, mediabox, ctm)
  11726. val = JM_py_from_matrix(ctm)
  11727. if self.rotation % 360 == 0:
  11728. val = Matrix(val)
  11729. else:
  11730. val = Matrix(1, 0, 0, -1, 0, self.cropbox.height)
  11731. return val
  11732. @property
  11733. def trimbox(self):
  11734. """The TrimBox"""
  11735. rect = self._other_box("TrimBox")
  11736. if rect is None:
  11737. return self.cropbox
  11738. mb = self.mediabox
  11739. return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
  11740. def update_link(page: 'Page', lnk: dict) -> None:
  11741. """Update a link on the current page."""
  11742. CheckParent(page)
  11743. annot = utils.getLinkText(page, lnk)
  11744. if annot == "":
  11745. raise ValueError("link kind not supported")
  11746. page.parent.update_object(lnk["xref"], annot, page=page)
  11747. def widgets(self, types=None):
  11748. """ Generator over the widgets of a page.
  11749. Args:
  11750. types: (list) field types to subselect from. If none,
  11751. all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT]
  11752. will only yield text fields.
  11753. """
  11754. #for a in self.annot_xrefs():
  11755. # log( '{a=}')
  11756. widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == mupdf.PDF_ANNOT_WIDGET]
  11757. #log(f'widgets(): {widget_xrefs=}')
  11758. for xref in widget_xrefs:
  11759. widget = self.load_widget(xref)
  11760. if types is None or widget.field_type in types:
  11761. yield (widget)
  11762. def wrap_contents(self):
  11763. """Ensure page is in a balanced graphics state."""
  11764. push, pop = self._count_q_balance() # count missing "q"/"Q" commands
  11765. if push > 0: # prepend required push commands
  11766. prepend = b"q\n" * push
  11767. TOOLS._insert_contents(self, prepend, False)
  11768. if pop > 0: # append required pop commands
  11769. append = b"\nQ" * pop + b"\n"
  11770. TOOLS._insert_contents(self, append, True)
  11771. def write_text(
  11772. page: 'Page',
  11773. rect=None,
  11774. writers=None,
  11775. overlay=True,
  11776. color=None,
  11777. opacity=None,
  11778. keep_proportion=True,
  11779. rotate=0,
  11780. oc=0,
  11781. ) -> None:
  11782. """Write the text of one or more pymupdf.TextWriter objects.
  11783. Args:
  11784. rect: target rectangle. If None, the union of the text writers is used.
  11785. writers: one or more pymupdf.TextWriter objects.
  11786. overlay: put in foreground or background.
  11787. keep_proportion: maintain aspect ratio of rectangle sides.
  11788. rotate: arbitrary rotation angle.
  11789. oc: the xref of an optional content object
  11790. """
  11791. assert isinstance(page, Page)
  11792. if not writers:
  11793. raise ValueError("need at least one pymupdf.TextWriter")
  11794. if type(writers) is TextWriter:
  11795. if rotate == 0 and rect is None:
  11796. writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
  11797. return None
  11798. else:
  11799. writers = (writers,)
  11800. clip = writers[0].text_rect
  11801. textdoc = Document()
  11802. tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
  11803. for writer in writers:
  11804. clip |= writer.text_rect
  11805. writer.write_text(tpage, opacity=opacity, color=color)
  11806. if rect is None:
  11807. rect = clip
  11808. page.show_pdf_page(
  11809. rect,
  11810. textdoc,
  11811. 0,
  11812. overlay=overlay,
  11813. keep_proportion=keep_proportion,
  11814. rotate=rotate,
  11815. clip=clip,
  11816. oc=oc,
  11817. )
  11818. textdoc = None
  11819. tpage = None
  11820. @property
  11821. def xref(self):
  11822. """PDF xref number of page."""
  11823. CheckParent(self)
  11824. return self.parent.page_xref(self.number)
  11825. rect = property(bound, doc="page rectangle")
  11826. # any result of layout analysis is stored here
  11827. layout_information = None
  11828. class Pixmap:
  11829. def __init__(self, *args):
  11830. """
  11831. Pixmap(colorspace, irect, alpha) - empty pixmap.
  11832. Pixmap(colorspace, src) - copy changing colorspace.
  11833. Pixmap(src, width, height,[clip]) - scaled copy, float dimensions.
  11834. Pixmap(src, alpha=1) - copy and add or drop alpha channel.
  11835. Pixmap(filename) - from an image in a file.
  11836. Pixmap(image) - from an image in memory (bytes).
  11837. Pixmap(colorspace, width, height, samples, alpha) - from samples data.
  11838. Pixmap(PDFdoc, xref) - from an image at xref in a PDF document.
  11839. """
  11840. # Cache for property `self.samples_mv`. Set here so __del_() sees it if
  11841. # we raise.
  11842. #
  11843. self._samples_mv = None
  11844. # 2024-01-16: Experimental support for a memory-view of the underlying
  11845. # data. Doesn't seem to make much difference to Pixmap.set_pixel() so
  11846. # not currently used.
  11847. self._memory_view = None
  11848. if 0:
  11849. pass
  11850. elif args_match(args,
  11851. (Colorspace, mupdf.FzColorspace),
  11852. (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple)
  11853. ):
  11854. # create empty pixmap with colorspace and IRect
  11855. cs, rect = args
  11856. alpha = 0
  11857. pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
  11858. self.this = pm
  11859. elif args_match(args,
  11860. (Colorspace, mupdf.FzColorspace),
  11861. (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple),
  11862. (int, bool)
  11863. ):
  11864. # create empty pixmap with colorspace and IRect
  11865. cs, rect, alpha = args
  11866. pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
  11867. self.this = pm
  11868. elif args_match(args, (Colorspace, mupdf.FzColorspace, type(None)), (Pixmap, mupdf.FzPixmap)):
  11869. # copy pixmap, converting colorspace
  11870. cs, spix = args
  11871. if isinstance(cs, Colorspace):
  11872. cs = cs.this
  11873. elif cs is None:
  11874. cs = mupdf.FzColorspace(None)
  11875. if isinstance(spix, Pixmap):
  11876. spix = spix.this
  11877. if not mupdf.fz_pixmap_colorspace(spix).m_internal:
  11878. raise ValueError( "source colorspace must not be None")
  11879. if cs.m_internal:
  11880. self.this = mupdf.fz_convert_pixmap(
  11881. spix,
  11882. cs,
  11883. mupdf.FzColorspace(),
  11884. mupdf.FzDefaultColorspaces(None),
  11885. mupdf.FzColorParams(),
  11886. 1
  11887. )
  11888. else:
  11889. self.this = mupdf.fz_new_pixmap_from_alpha_channel( spix)
  11890. if not self.this.m_internal:
  11891. raise RuntimeError( MSG_PIX_NOALPHA)
  11892. elif args_match(args, (Pixmap, mupdf.FzPixmap), (Pixmap, mupdf.FzPixmap)):
  11893. # add mask to a pixmap w/o alpha channel
  11894. spix, mpix = args
  11895. if isinstance(spix, Pixmap):
  11896. spix = spix.this
  11897. if isinstance(mpix, Pixmap):
  11898. mpix = mpix.this
  11899. spm = spix
  11900. mpm = mpix
  11901. if not spix.m_internal: # intercept NULL for spix: make alpha only pix
  11902. dst = mupdf.fz_new_pixmap_from_alpha_channel(mpm)
  11903. if not dst.m_internal:
  11904. raise RuntimeError( MSG_PIX_NOALPHA)
  11905. else:
  11906. dst = mupdf.fz_new_pixmap_from_color_and_mask(spm, mpm)
  11907. self.this = dst
  11908. elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or
  11909. args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))):
  11910. # create pixmap as scaled copy of another one
  11911. if len(args) == 3:
  11912. spix, w, h = args
  11913. bbox = mupdf.FzIrect(mupdf.fz_infinite_irect)
  11914. else:
  11915. spix, w, h, clip = args
  11916. bbox = JM_irect_from_py(clip)
  11917. src_pix = spix.this if isinstance(spix, Pixmap) else spix
  11918. if not mupdf.fz_is_infinite_irect(bbox):
  11919. pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, bbox)
  11920. else:
  11921. pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, mupdf.FzIrect(mupdf.fz_infinite_irect))
  11922. self.this = pm
  11923. elif args_match(args, str, (Pixmap, mupdf.FzPixmap)) and args[0] == 'raw':
  11924. # Special raw construction where we set .this directly.
  11925. _, pm = args
  11926. if isinstance(pm, Pixmap):
  11927. pm = pm.this
  11928. self.this = pm
  11929. elif args_match(args, (Pixmap, mupdf.FzPixmap), (int, None)):
  11930. # Pixmap(struct Pixmap *spix, int alpha=1)
  11931. # copy pixmap & add / drop the alpha channel
  11932. spix = args[0]
  11933. alpha = args[1] if len(args) == 2 else 1
  11934. src_pix = spix.this if isinstance(spix, Pixmap) else spix
  11935. if not _INRANGE(alpha, 0, 1):
  11936. raise ValueError( "bad alpha value")
  11937. cs = mupdf.fz_pixmap_colorspace(src_pix)
  11938. if not cs.m_internal and not alpha:
  11939. raise ValueError( "cannot drop alpha for 'NULL' colorspace")
  11940. seps = mupdf.FzSeparations()
  11941. n = mupdf.fz_pixmap_colorants(src_pix)
  11942. w = mupdf.fz_pixmap_width(src_pix)
  11943. h = mupdf.fz_pixmap_height(src_pix)
  11944. pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
  11945. pm.m_internal.x = src_pix.m_internal.x
  11946. pm.m_internal.y = src_pix.m_internal.y
  11947. pm.m_internal.xres = src_pix.m_internal.xres
  11948. pm.m_internal.yres = src_pix.m_internal.yres
  11949. # copy samples data ------------------------------------------
  11950. if 1:
  11951. # We use our pixmap_copy() to get best performance.
  11952. # test_pixmap.py:test_setalpha(): 3.9s t=0.0062
  11953. extra.pixmap_copy( pm.m_internal, src_pix.m_internal, n)
  11954. elif 1:
  11955. # Use memoryview.
  11956. # test_pixmap.py:test_setalpha(): 4.6 t=0.51
  11957. src_view = mupdf.fz_pixmap_samples_memoryview( src_pix)
  11958. pm_view = mupdf.fz_pixmap_samples_memoryview( pm)
  11959. if src_pix.alpha() == pm.alpha(): # identical samples
  11960. #memcpy(tptr, sptr, w * h * (n + alpha));
  11961. size = w * h * (n + alpha)
  11962. pm_view[ 0 : size] = src_view[ 0 : size]
  11963. else:
  11964. tptr = 0
  11965. sptr = 0
  11966. # This is a little faster than calling
  11967. # pm.fz_samples_set(), but still quite slow. E.g. reduces
  11968. # test_pixmap.py:test_setalpha() from 6.7s to 4.5s.
  11969. #
  11970. # t=0.53
  11971. pm_stride = pm.stride()
  11972. pm_n = pm.n()
  11973. pm_alpha = pm.alpha()
  11974. src_stride = src_pix.stride()
  11975. src_n = src_pix.n()
  11976. #log( '{=pm_stride pm_n src_stride src_n}')
  11977. for y in range( h):
  11978. for x in range( w):
  11979. pm_i = pm_stride * y + pm_n * x
  11980. src_i = src_stride * y + src_n * x
  11981. pm_view[ pm_i : pm_i + n] = src_view[ src_i : src_i + n]
  11982. if pm_alpha:
  11983. pm_view[ pm_i + n] = 255
  11984. else:
  11985. # Copy individual bytes from Python. Very slow.
  11986. # test_pixmap.py:test_setalpha(): 6.89 t=2.601
  11987. if src_pix.alpha() == pm.alpha(): # identical samples
  11988. #memcpy(tptr, sptr, w * h * (n + alpha));
  11989. for i in range(w * h * (n + alpha)):
  11990. mupdf.fz_samples_set(pm, i, mupdf.fz_samples_get(src_pix, i))
  11991. else:
  11992. # t=2.56
  11993. tptr = 0
  11994. sptr = 0
  11995. src_pix_alpha = src_pix.alpha()
  11996. for i in range(w * h):
  11997. #memcpy(tptr, sptr, n);
  11998. for j in range(n):
  11999. mupdf.fz_samples_set(pm, tptr + j, mupdf.fz_samples_get(src_pix, sptr + j))
  12000. tptr += n
  12001. if pm.alpha():
  12002. mupdf.fz_samples_set(pm, tptr, 255)
  12003. tptr += 1
  12004. sptr += n + src_pix_alpha
  12005. self.this = pm
  12006. elif args_match(args, (mupdf.FzColorspace, Colorspace), int, int, None, (int, bool)):
  12007. # create pixmap from samples data
  12008. cs, w, h, samples, alpha = args
  12009. if isinstance(cs, Colorspace):
  12010. cs = cs.this
  12011. assert isinstance(cs, mupdf.FzColorspace)
  12012. n = mupdf.fz_colorspace_n(cs)
  12013. stride = (n + alpha) * w
  12014. seps = mupdf.FzSeparations()
  12015. pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
  12016. if isinstance( samples, (bytes, bytearray)):
  12017. #log('using mupdf.python_buffer_data()')
  12018. samples2 = mupdf.python_buffer_data(samples)
  12019. size = len(samples)
  12020. else:
  12021. res = JM_BufferFromBytes(samples)
  12022. if not res.m_internal:
  12023. raise ValueError( "bad samples data")
  12024. size, c = mupdf.fz_buffer_storage(res)
  12025. samples2 = mupdf.python_buffer_data(samples) # raw swig proxy for `const unsigned char*`.
  12026. if stride * h != size:
  12027. raise ValueError( f"bad samples length {w=} {h=} {alpha=} {n=} {stride=} {size=}")
  12028. mupdf.ll_fz_pixmap_copy_raw( pm.m_internal, samples2)
  12029. self.this = pm
  12030. elif args_match(args, None):
  12031. # create pixmap from filename, file object, pathlib.Path or memory
  12032. imagedata, = args
  12033. name = 'name'
  12034. if hasattr(imagedata, "resolve"):
  12035. fname = imagedata.__str__()
  12036. if fname:
  12037. img = mupdf.fz_new_image_from_file(fname)
  12038. elif hasattr(imagedata, name):
  12039. fname = imagedata.name
  12040. if fname:
  12041. img = mupdf.fz_new_image_from_file(fname)
  12042. elif isinstance(imagedata, str):
  12043. img = mupdf.fz_new_image_from_file(imagedata)
  12044. else:
  12045. res = JM_BufferFromBytes(imagedata)
  12046. if not res.m_internal or not res.m_internal.len:
  12047. raise ValueError( "bad image data")
  12048. img = mupdf.fz_new_image_from_buffer(res)
  12049. # Original code passed null for subarea and ctm, but that's not
  12050. # possible with MuPDF's python bindings. The equivalent is an
  12051. # infinite rect and identify matrix scaled by img.w() and img.h().
  12052. pm, w, h = mupdf.fz_get_pixmap_from_image(
  12053. img,
  12054. mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
  12055. mupdf.FzMatrix( img.w(), 0, 0, img.h(), 0, 0),
  12056. )
  12057. xres, yres = mupdf.fz_image_resolution(img)
  12058. pm.m_internal.xres = xres
  12059. pm.m_internal.yres = yres
  12060. self.this = pm
  12061. elif args_match(args, (Document, mupdf.FzDocument), int):
  12062. # Create pixmap from PDF image identified by XREF number
  12063. doc, xref = args
  12064. pdf = _as_pdf_document(doc)
  12065. xreflen = mupdf.pdf_xref_len(pdf)
  12066. if not _INRANGE(xref, 1, xreflen-1):
  12067. raise ValueError( MSG_BAD_XREF)
  12068. ref = mupdf.pdf_new_indirect(pdf, xref, 0)
  12069. type_ = mupdf.pdf_dict_get(ref, PDF_NAME('Subtype'))
  12070. if (not mupdf.pdf_name_eq(type_, PDF_NAME('Image'))
  12071. and not mupdf.pdf_name_eq(type_, PDF_NAME('Alpha'))
  12072. and not mupdf.pdf_name_eq(type_, PDF_NAME('Luminosity'))
  12073. ):
  12074. raise ValueError( MSG_IS_NO_IMAGE)
  12075. img = mupdf.pdf_load_image(pdf, ref)
  12076. # Original code passed null for subarea and ctm, but that's not
  12077. # possible with MuPDF's python bindings. The equivalent is an
  12078. # infinite rect and identify matrix scaled by img.w() and img.h().
  12079. pix, w, h = mupdf.fz_get_pixmap_from_image(
  12080. img,
  12081. mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
  12082. mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0),
  12083. )
  12084. self.this = pix
  12085. else:
  12086. text = 'Unrecognised args for constructing Pixmap:\n'
  12087. for arg in args:
  12088. text += f' {type(arg)}: {arg}\n'
  12089. raise Exception( text)
  12090. def __len__(self):
  12091. return self.size
  12092. def __repr__(self):
  12093. if not type(self) is Pixmap: return
  12094. if self.colorspace:
  12095. return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha)
  12096. else:
  12097. return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha)
  12098. def _tobytes(self, format_, jpg_quality):
  12099. '''
  12100. Pixmap._tobytes
  12101. '''
  12102. pm = self.this
  12103. size = mupdf.fz_pixmap_stride(pm) * pm.h()
  12104. res = mupdf.fz_new_buffer(size)
  12105. out = mupdf.FzOutput(res)
  12106. if format_ == 1: mupdf.fz_write_pixmap_as_png(out, pm)
  12107. elif format_ == 2: mupdf.fz_write_pixmap_as_pnm(out, pm)
  12108. elif format_ == 3: mupdf.fz_write_pixmap_as_pam(out, pm)
  12109. elif format_ == 5: mupdf.fz_write_pixmap_as_psd(out, pm)
  12110. elif format_ == 6: mupdf.fz_write_pixmap_as_ps(out, pm)
  12111. elif format_ == 7:
  12112. mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality, 0)
  12113. else:
  12114. mupdf.fz_write_pixmap_as_png(out, pm)
  12115. out.fz_close_output()
  12116. barray = JM_BinFromBuffer(res)
  12117. return barray
  12118. def _writeIMG(self, filename, format_, jpg_quality):
  12119. pm = self.this
  12120. if format_ == 1: mupdf.fz_save_pixmap_as_png(pm, filename)
  12121. elif format_ == 2: mupdf.fz_save_pixmap_as_pnm(pm, filename)
  12122. elif format_ == 3: mupdf.fz_save_pixmap_as_pam(pm, filename)
  12123. elif format_ == 5: mupdf.fz_save_pixmap_as_psd(pm, filename)
  12124. elif format_ == 6: mupdf.fz_save_pixmap_as_ps(pm, filename)
  12125. elif format_ == 7: mupdf.fz_save_pixmap_as_jpeg(pm, filename, jpg_quality)
  12126. else: mupdf.fz_save_pixmap_as_png(pm, filename)
  12127. @property
  12128. def alpha(self):
  12129. """Indicates presence of alpha channel."""
  12130. return mupdf.fz_pixmap_alpha(self.this)
  12131. def clear_with(self, value=None, bbox=None):
  12132. """Fill all color components with same value."""
  12133. if value is None:
  12134. mupdf.fz_clear_pixmap(self.this)
  12135. elif bbox is None:
  12136. mupdf.fz_clear_pixmap_with_value(self.this, value)
  12137. else:
  12138. JM_clear_pixmap_rect_with_value(self.this, value, JM_irect_from_py(bbox))
  12139. def color_count(self, colors=0, clip=None):
  12140. '''
  12141. Return count of each color.
  12142. '''
  12143. pm = self.this
  12144. rc = JM_color_count( pm, clip)
  12145. if not colors:
  12146. return len( rc)
  12147. return rc
  12148. def color_topusage(self, clip=None):
  12149. """Return most frequent color and its usage ratio."""
  12150. allpixels = 0
  12151. cnt = 0
  12152. if clip is not None and self.irect in Rect(clip):
  12153. clip = self.irect
  12154. for pixel, count in self.color_count(colors=True,clip=clip).items():
  12155. allpixels += count
  12156. if count > cnt:
  12157. cnt = count
  12158. maxpixel = pixel
  12159. if not allpixels:
  12160. return (1, bytes([255] * self.n))
  12161. return (cnt / allpixels, maxpixel)
  12162. @property
  12163. def colorspace(self):
  12164. """Pixmap Colorspace."""
  12165. cs = Colorspace(mupdf.fz_pixmap_colorspace(self.this))
  12166. if cs.name == "None":
  12167. return None
  12168. return cs
  12169. def copy(self, src, bbox):
  12170. """Copy bbox from another Pixmap."""
  12171. pm = self.this
  12172. src_pix = src.this
  12173. if not mupdf.fz_pixmap_colorspace(src_pix):
  12174. raise ValueError( "cannot copy pixmap with NULL colorspace")
  12175. if pm.alpha() != src_pix.alpha():
  12176. raise ValueError( "source and target alpha must be equal")
  12177. mupdf.fz_copy_pixmap_rect(pm, src_pix, JM_irect_from_py(bbox), mupdf.FzDefaultColorspaces(None))
  12178. @property
  12179. def digest(self):
  12180. """MD5 digest of pixmap (bytes)."""
  12181. ret = mupdf.fz_md5_pixmap2(self.this)
  12182. return bytes(ret)
  12183. def gamma_with(self, gamma):
  12184. """Apply correction with some float.
  12185. gamma=1 is a no-op."""
  12186. if not mupdf.fz_pixmap_colorspace( self.this):
  12187. message_warning("colorspace invalid for function")
  12188. return
  12189. mupdf.fz_gamma_pixmap( self.this, gamma)
  12190. @property
  12191. def h(self):
  12192. """The height."""
  12193. return mupdf.fz_pixmap_height(self.this)
  12194. def invert_irect(self, bbox=None):
  12195. """Invert the colors inside a bbox."""
  12196. pm = self.this
  12197. if not mupdf.fz_pixmap_colorspace(pm).m_internal:
  12198. message_warning("ignored for stencil pixmap")
  12199. return False
  12200. r = JM_irect_from_py(bbox)
  12201. if mupdf.fz_is_infinite_irect(r):
  12202. mupdf.fz_invert_pixmap(pm)
  12203. return True
  12204. mupdf.fz_invert_pixmap_rect(pm, r)
  12205. return True
  12206. @property
  12207. def irect(self):
  12208. """Pixmap bbox - an IRect object."""
  12209. val = mupdf.fz_pixmap_bbox(self.this)
  12210. return JM_py_from_irect( val)
  12211. @property
  12212. def is_monochrome(self):
  12213. """Check if pixmap is monochrome."""
  12214. return mupdf.fz_is_pixmap_monochrome( self.this)
  12215. @property
  12216. def is_unicolor(self):
  12217. '''
  12218. Check if pixmap has only one color.
  12219. '''
  12220. pm = self.this
  12221. n = pm.n()
  12222. count = pm.w() * pm.h() * n
  12223. def _pixmap_read_samples(pm, offset, n):
  12224. ret = list()
  12225. for i in range(n):
  12226. ret.append(mupdf.fz_samples_get(pm, offset+i))
  12227. return ret
  12228. for offset in range( 0, count, n):
  12229. if offset == 0:
  12230. sample0 = _pixmap_read_samples( pm, 0, n)
  12231. else:
  12232. sample = _pixmap_read_samples( pm, offset, n)
  12233. if sample != sample0:
  12234. return False
  12235. return True
  12236. @property
  12237. def n(self):
  12238. """The size of one pixel."""
  12239. if g_use_extra:
  12240. # Setting self.__class__.n gives a small reduction in overhead of
  12241. # test_general.py:test_2093, e.g. 1.4x -> 1.3x.
  12242. #return extra.pixmap_n(self.this)
  12243. def n2(self):
  12244. return extra.pixmap_n(self.this)
  12245. self.__class__.n = property(n2)
  12246. return self.n
  12247. return mupdf.fz_pixmap_components(self.this)
  12248. def pdfocr_save(self, filename, compress=1, language=None, tessdata=None):
  12249. '''
  12250. Save pixmap as an OCR-ed PDF page.
  12251. '''
  12252. tessdata = get_tessdata(tessdata)
  12253. opts = mupdf.FzPdfocrOptions()
  12254. opts.compress = compress
  12255. if language:
  12256. opts.language_set2( language)
  12257. if tessdata:
  12258. opts.datadir_set2( tessdata)
  12259. pix = self.this
  12260. if isinstance(filename, str):
  12261. mupdf.fz_save_pixmap_as_pdfocr( pix, filename, 0, opts)
  12262. else:
  12263. out = JM_new_output_fileptr( filename)
  12264. try:
  12265. mupdf.fz_write_pixmap_as_pdfocr( out, pix, opts)
  12266. finally:
  12267. out.fz_close_output() # Avoid MuPDF warning.
  12268. def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
  12269. """Save pixmap as an OCR-ed PDF page.
  12270. Args:
  12271. compress: (bool) compress, default 1 (True).
  12272. language: (str) language(s) occurring on page, default "eng" (English),
  12273. multiples like "eng+ger" for English and German.
  12274. tessdata: (str) folder name of Tesseract's language support. If None
  12275. we use environment variable TESSDATA_PREFIX or search for
  12276. Tesseract installation.
  12277. Notes:
  12278. On failure, make sure Tesseract is installed and you have set
  12279. <tessdata> or environment variable "TESSDATA_PREFIX" to the folder
  12280. containing your Tesseract's language support data.
  12281. """
  12282. tessdata = get_tessdata(tessdata)
  12283. from io import BytesIO
  12284. bio = BytesIO()
  12285. self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata)
  12286. return bio.getvalue()
  12287. def pil_image(self):
  12288. """Create a Pillow Image from the Pixmap."""
  12289. try:
  12290. from PIL import Image
  12291. except ImportError:
  12292. message("PIL/Pillow not installed")
  12293. raise
  12294. cspace = self.colorspace
  12295. if not cspace:
  12296. mode = "L"
  12297. elif cspace.n == 1:
  12298. mode = "L" if not self.alpha else "LA"
  12299. elif cspace.n == 3:
  12300. mode = "RGB" if not self.alpha else "RGBA"
  12301. else:
  12302. mode = "CMYK"
  12303. img = Image.frombytes(mode, (self.width, self.height), self.samples)
  12304. return img
  12305. def pil_save(self, *args, **kwargs):
  12306. """Write to image file using Pillow.
  12307. An intermediate PIL Image is created, and its "save" method is used
  12308. to store the image. See Pillow documentation to learn about the
  12309. meaning of possible positional and keyword parameters.
  12310. Use this when other output formats are desired.
  12311. """
  12312. img = self.pil_image()
  12313. if "dpi" not in kwargs.keys():
  12314. kwargs["dpi"] = (self.xres, self.yres)
  12315. img.save(*args, **kwargs)
  12316. def pil_tobytes(self, *args, **kwargs):
  12317. """Convert to an image in memory using Pillow.
  12318. An intermediate PIL Image is created, and its "save" method is used
  12319. to store the image. See Pillow documentation to learn about the
  12320. meaning of possible positional or keyword parameters.
  12321. Use this when other output formats are desired.
  12322. """
  12323. bytes_out = io.BytesIO()
  12324. img = self.pil_image()
  12325. if "dpi" not in kwargs.keys():
  12326. kwargs["dpi"] = (self.xres, self.yres)
  12327. img.save(bytes_out, *args, **kwargs)
  12328. return bytes_out.getvalue()
  12329. def pixel(self, x, y):
  12330. """Get color tuple of pixel (x, y).
  12331. Last item is the alpha if Pixmap.alpha is true."""
  12332. if g_use_extra:
  12333. return extra.pixmap_pixel(self.this.m_internal, x, y)
  12334. if (0
  12335. or x < 0
  12336. or x >= self.this.m_internal.w
  12337. or y < 0
  12338. or y >= self.this.m_internal.h
  12339. ):
  12340. RAISEPY(MSG_PIXEL_OUTSIDE, PyExc_ValueError)
  12341. n = self.this.m_internal.n
  12342. stride = self.this.m_internal.stride
  12343. i = stride * y + n * x
  12344. ret = tuple( self.samples_mv[ i: i+n])
  12345. return ret
  12346. @property
  12347. def samples(self)->bytes:
  12348. mv = self.samples_mv
  12349. return bytes( mv)
  12350. @property
  12351. def samples_mv(self):
  12352. '''
  12353. Pixmap samples memoryview.
  12354. '''
  12355. # We remember the returned memoryview so that our `__del__()` can
  12356. # release it; otherwise accessing it after we have been destructed will
  12357. # fail, possibly crashing Python; this is #4155.
  12358. #
  12359. if self._samples_mv is None:
  12360. self._samples_mv = mupdf.fz_pixmap_samples_memoryview(self.this)
  12361. return self._samples_mv
  12362. def _samples_mv_release(self):
  12363. if self._samples_mv:
  12364. self._samples_mv.release()
  12365. @property
  12366. def samples_ptr(self):
  12367. return mupdf.fz_pixmap_samples_int(self.this)
  12368. def save(self, filename, output=None, jpg_quality=95):
  12369. """Output as image in format determined by filename extension.
  12370. Args:
  12371. output: (str) only use to overrule filename extension. Default is PNG.
  12372. Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS.
  12373. """
  12374. valid_formats = {
  12375. "png": 1,
  12376. "pnm": 2,
  12377. "pgm": 2,
  12378. "ppm": 2,
  12379. "pbm": 2,
  12380. "pam": 3,
  12381. "psd": 5,
  12382. "ps": 6,
  12383. "jpg": 7,
  12384. "jpeg": 7,
  12385. }
  12386. if type(filename) is str:
  12387. pass
  12388. elif hasattr(filename, "absolute"):
  12389. filename = str(filename)
  12390. elif hasattr(filename, "name"):
  12391. filename = filename.name
  12392. if output is None:
  12393. _, ext = os.path.splitext(filename)
  12394. output = ext[1:]
  12395. idx = valid_formats.get(output.lower(), None)
  12396. if idx is None:
  12397. raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
  12398. if self.alpha and idx in (2, 6, 7):
  12399. raise ValueError("'%s' cannot have alpha" % output)
  12400. if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
  12401. raise ValueError(f"unsupported colorspace for '{output}'")
  12402. if idx == 7:
  12403. self.set_dpi(self.xres, self.yres)
  12404. return self._writeIMG(filename, idx, jpg_quality)
  12405. def set_alpha(self, alphavalues=None, premultiply=1, opaque=None, matte=None):
  12406. """Set alpha channel to values contained in a byte array.
  12407. If omitted, set alphas to 255.
  12408. Args:
  12409. alphavalues: (bytes) with length (width * height) or 'None'.
  12410. premultiply: (bool, True) premultiply colors with alpha values.
  12411. opaque: (tuple, length colorspace.n) this color receives opacity 0.
  12412. matte: (tuple, length colorspace.n)) preblending background color.
  12413. """
  12414. pix = self.this
  12415. alpha = 0
  12416. m = 0
  12417. if pix.alpha() == 0:
  12418. raise ValueError( MSG_PIX_NOALPHA)
  12419. n = mupdf.fz_pixmap_colorants(pix)
  12420. w = mupdf.fz_pixmap_width(pix)
  12421. h = mupdf.fz_pixmap_height(pix)
  12422. balen = w * h * (n+1)
  12423. colors = [0, 0, 0, 0] # make this color opaque
  12424. bgcolor = [0, 0, 0, 0] # preblending background color
  12425. zero_out = 0
  12426. bground = 0
  12427. if opaque and isinstance(opaque, (list, tuple)) and len(opaque) == n:
  12428. for i in range(n):
  12429. colors[i] = opaque[i]
  12430. zero_out = 1
  12431. if matte and isinstance( matte, (tuple, list)) and len(matte) == n:
  12432. for i in range(n):
  12433. bgcolor[i] = matte[i]
  12434. bground = 1
  12435. data = bytes()
  12436. data_len = 0
  12437. if alphavalues:
  12438. #res = JM_BufferFromBytes(alphavalues)
  12439. #data_len, data = mupdf.fz_buffer_storage(res)
  12440. #if data_len < w * h:
  12441. # THROWMSG("bad alpha values")
  12442. # fixme: don't seem to need to create an fz_buffer - can
  12443. # use <alphavalues> directly?
  12444. if isinstance(alphavalues, (bytes, bytearray)):
  12445. data = alphavalues
  12446. data_len = len(alphavalues)
  12447. else:
  12448. assert 0, f'unexpected type for alphavalues: {type(alphavalues)}'
  12449. if data_len < w * h:
  12450. raise ValueError( "bad alpha values")
  12451. if 1:
  12452. # Use C implementation for speed.
  12453. mupdf.Pixmap_set_alpha_helper(
  12454. balen,
  12455. n,
  12456. data_len,
  12457. zero_out,
  12458. mupdf.python_buffer_data( data),
  12459. pix.m_internal,
  12460. premultiply,
  12461. bground,
  12462. colors,
  12463. bgcolor,
  12464. )
  12465. else:
  12466. i = k = j = 0
  12467. data_fix = 255
  12468. while i < balen:
  12469. alpha = data[k]
  12470. if zero_out:
  12471. for j in range(i, i+n):
  12472. if mupdf.fz_samples_get(pix, j) != colors[j - i]:
  12473. data_fix = 255
  12474. break
  12475. else:
  12476. data_fix = 0
  12477. if data_len:
  12478. def fz_mul255( a, b):
  12479. x = a * b + 128
  12480. x += x // 256
  12481. return x // 256
  12482. if data_fix == 0:
  12483. mupdf.fz_samples_set(pix, i+n, 0)
  12484. else:
  12485. mupdf.fz_samples_set(pix, i+n, alpha)
  12486. if premultiply and not bground:
  12487. for j in range(i, i+n):
  12488. mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j), alpha))
  12489. elif bground:
  12490. for j in range( i, i+n):
  12491. m = bgcolor[j - i]
  12492. mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j) - m, alpha))
  12493. else:
  12494. mupdf.fz_samples_set(pix, i+n, data_fix)
  12495. i += n+1
  12496. k += 1
  12497. def tobytes(self, output="png", jpg_quality=95):
  12498. '''
  12499. Convert to binary image stream of desired type.
  12500. '''
  12501. valid_formats = {
  12502. "png": 1,
  12503. "pnm": 2,
  12504. "pgm": 2,
  12505. "ppm": 2,
  12506. "pbm": 2,
  12507. "pam": 3,
  12508. "tga": 4,
  12509. "tpic": 4,
  12510. "psd": 5,
  12511. "ps": 6,
  12512. 'jpg': 7,
  12513. 'jpeg': 7,
  12514. }
  12515. idx = valid_formats.get(output.lower(), None)
  12516. if idx is None:
  12517. raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
  12518. if self.alpha and idx in (2, 6, 7):
  12519. raise ValueError("'{output}' cannot have alpha")
  12520. if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
  12521. raise ValueError(f"unsupported colorspace for '{output}'")
  12522. if idx == 7:
  12523. self.set_dpi(self.xres, self.yres)
  12524. barray = self._tobytes(idx, jpg_quality)
  12525. return barray
  12526. def set_dpi(self, xres, yres):
  12527. """Set resolution in both dimensions."""
  12528. pm = self.this
  12529. pm.m_internal.xres = xres
  12530. pm.m_internal.yres = yres
  12531. def set_origin(self, x, y):
  12532. """Set top-left coordinates."""
  12533. pm = self.this
  12534. pm.m_internal.x = x
  12535. pm.m_internal.y = y
  12536. def set_pixel(self, x, y, color):
  12537. """Set color of pixel (x, y)."""
  12538. if g_use_extra:
  12539. return extra.set_pixel(self.this.m_internal, x, y, color)
  12540. pm = self.this
  12541. if not _INRANGE(x, 0, pm.w() - 1) or not _INRANGE(y, 0, pm.h() - 1):
  12542. raise ValueError( MSG_PIXEL_OUTSIDE)
  12543. n = pm.n()
  12544. for j in range(n):
  12545. i = color[j]
  12546. if not _INRANGE(i, 0, 255):
  12547. raise ValueError( MSG_BAD_COLOR_SEQ)
  12548. stride = mupdf.fz_pixmap_stride( pm)
  12549. i = stride * y + n * x
  12550. if 0:
  12551. # Using a cached self._memory_view doesn't actually make much
  12552. # difference to speed.
  12553. if not self._memory_view:
  12554. self._memory_view = self.samples_mv
  12555. for j in range(n):
  12556. self._memory_view[i + j] = color[j]
  12557. else:
  12558. for j in range(n):
  12559. pm.fz_samples_set(i + j, color[j])
  12560. def set_rect(self, bbox, color):
  12561. """Set color of all pixels in bbox."""
  12562. pm = self.this
  12563. n = pm.n()
  12564. c = []
  12565. for j in range(n):
  12566. i = color[j]
  12567. if not _INRANGE(i, 0, 255):
  12568. raise ValueError( MSG_BAD_COLOR_SEQ)
  12569. c.append(i)
  12570. bbox = JM_irect_from_py(bbox)
  12571. i = JM_fill_pixmap_rect_with_color(pm, c, bbox)
  12572. rc = bool(i)
  12573. return rc
  12574. def shrink(self, factor):
  12575. """Divide width and height by 2**factor.
  12576. E.g. factor=1 shrinks to 25% of original size (in place)."""
  12577. if factor < 1:
  12578. message_warning("ignoring shrink factor < 1")
  12579. return
  12580. mupdf.fz_subsample_pixmap( self.this, factor)
  12581. # Pixmap has changed so clear our memory view.
  12582. self._memory_view = None
  12583. self._samples_mv_release()
  12584. @property
  12585. def size(self):
  12586. """Pixmap size."""
  12587. return mupdf.fz_pixmap_size( self.this)
  12588. @property
  12589. def stride(self):
  12590. """Length of one image line (width * n)."""
  12591. return self.this.stride()
  12592. def tint_with(self, black, white):
  12593. """Tint colors with modifiers for black and white."""
  12594. if not self.colorspace or self.colorspace.n > 3:
  12595. message("warning: colorspace invalid for function")
  12596. return
  12597. return mupdf.fz_tint_pixmap( self.this, black, white)
  12598. @property
  12599. def w(self):
  12600. """The width."""
  12601. return mupdf.fz_pixmap_width(self.this)
  12602. def warp(self, quad, width, height):
  12603. """Return pixmap from a warped quad."""
  12604. if not quad.is_convex: raise ValueError("quad must be convex")
  12605. q = JM_quad_from_py(quad)
  12606. points = [ q.ul, q.ur, q.lr, q.ll]
  12607. dst = mupdf.fz_warp_pixmap( self.this, points, width, height)
  12608. return Pixmap( dst)
  12609. @property
  12610. def x(self):
  12611. """x component of Pixmap origin."""
  12612. return mupdf.fz_pixmap_x(self.this)
  12613. @property
  12614. def xres(self):
  12615. """Resolution in x direction."""
  12616. return self.this.xres()
  12617. @property
  12618. def y(self):
  12619. """y component of Pixmap origin."""
  12620. return mupdf.fz_pixmap_y(self.this)
  12621. @property
  12622. def yres(self):
  12623. """Resolution in y direction."""
  12624. return self.this.yres()
  12625. width = w
  12626. height = h
  12627. def __del__(self):
  12628. if self._samples_mv:
  12629. self._samples_mv.release()
  12630. del Point
  12631. class Point:
  12632. def __abs__(self):
  12633. return math.sqrt(self.x * self.x + self.y * self.y)
  12634. def __add__(self, p):
  12635. if hasattr(p, "__float__"):
  12636. return Point(self.x + p, self.y + p)
  12637. if len(p) != 2:
  12638. raise ValueError("Point: bad seq len")
  12639. return Point(self.x + p[0], self.y + p[1])
  12640. def __bool__(self):
  12641. return not (max(self) == min(self) == 0)
  12642. def __eq__(self, p):
  12643. if not hasattr(p, "__len__"):
  12644. return False
  12645. return len(p) == 2 and not (self - p)
  12646. def __getitem__(self, i):
  12647. return (self.x, self.y)[i]
  12648. def __hash__(self):
  12649. return hash(tuple(self))
  12650. def __init__(self, *args, x=None, y=None):
  12651. '''
  12652. Point() - all zeros
  12653. Point(x, y)
  12654. Point(Point) - new copy
  12655. Point(sequence) - from 'sequence'
  12656. Explicit keyword args x, y override earlier settings if not None.
  12657. '''
  12658. if not args:
  12659. self.x = 0.0
  12660. self.y = 0.0
  12661. elif len(args) > 2:
  12662. raise ValueError("Point: bad seq len")
  12663. elif len(args) == 2:
  12664. self.x = float(args[0])
  12665. self.y = float(args[1])
  12666. elif len(args) == 1:
  12667. l = args[0]
  12668. if isinstance(l, (mupdf.FzPoint, mupdf.fz_point)):
  12669. self.x = l.x
  12670. self.y = l.y
  12671. else:
  12672. if not hasattr(l, "__getitem__"):
  12673. raise ValueError("Point: bad args")
  12674. if len(l) != 2:
  12675. raise ValueError("Point: bad seq len")
  12676. self.x = float(l[0])
  12677. self.y = float(l[1])
  12678. else:
  12679. raise ValueError("Point: bad seq len")
  12680. if x is not None: self.x = x
  12681. if y is not None: self.y = y
  12682. def __len__(self):
  12683. return 2
  12684. def __mul__(self, m):
  12685. if hasattr(m, "__float__"):
  12686. return Point(self.x * m, self.y * m)
  12687. if hasattr(m, "__getitem__") and len(m) == 2:
  12688. # dot product
  12689. return self.x * m[0] + self.y * m[1]
  12690. p = Point(self)
  12691. return p.transform(m)
  12692. def __neg__(self):
  12693. return Point(-self.x, -self.y)
  12694. def __nonzero__(self):
  12695. return not (max(self) == min(self) == 0)
  12696. def __pos__(self):
  12697. return Point(self)
  12698. def __repr__(self):
  12699. return "Point" + str(tuple(self))
  12700. def __setitem__(self, i, v):
  12701. v = float(v)
  12702. if i == 0: self.x = v
  12703. elif i == 1: self.y = v
  12704. else:
  12705. raise IndexError("index out of range")
  12706. return None
  12707. def __sub__(self, p):
  12708. if hasattr(p, "__float__"):
  12709. return Point(self.x - p, self.y - p)
  12710. if len(p) != 2:
  12711. raise ValueError("Point: bad seq len")
  12712. return Point(self.x - p[0], self.y - p[1])
  12713. def __truediv__(self, m):
  12714. if hasattr(m, "__float__"):
  12715. return Point(self.x * 1./m, self.y * 1./m)
  12716. m1 = util_invert_matrix(m)[1]
  12717. if not m1:
  12718. raise ZeroDivisionError("matrix not invertible")
  12719. p = Point(self)
  12720. return p.transform(m1)
  12721. @property
  12722. def abs_unit(self):
  12723. """Unit vector with positive coordinates."""
  12724. s = self.x * self.x + self.y * self.y
  12725. if s < EPSILON:
  12726. return Point(0,0)
  12727. s = math.sqrt(s)
  12728. return Point(abs(self.x) / s, abs(self.y) / s)
  12729. def distance_to(self, *args):
  12730. """Return distance to rectangle or another point."""
  12731. if not len(args) > 0:
  12732. raise ValueError("at least one parameter must be given")
  12733. x = args[0]
  12734. if len(x) == 2:
  12735. x = Point(x)
  12736. elif len(x) == 4:
  12737. x = Rect(x)
  12738. else:
  12739. raise ValueError("arg1 must be point-like or rect-like")
  12740. if len(args) > 1:
  12741. unit = args[1]
  12742. else:
  12743. unit = "px"
  12744. u = {"px": (1.,1.), "in": (1.,72.), "cm": (2.54, 72.),
  12745. "mm": (25.4, 72.)}
  12746. f = u[unit][0] / u[unit][1]
  12747. if type(x) is Point:
  12748. return abs(self - x) * f
  12749. # from here on, x is a rectangle
  12750. # as a safeguard, make a finite copy of it
  12751. r = Rect(x.top_left, x.top_left)
  12752. r = r | x.bottom_right
  12753. if self in r:
  12754. return 0.0
  12755. if self.x > r.x1:
  12756. if self.y >= r.y1:
  12757. return self.distance_to(r.bottom_right, unit)
  12758. elif self.y <= r.y0:
  12759. return self.distance_to(r.top_right, unit)
  12760. else:
  12761. return (self.x - r.x1) * f
  12762. elif r.x0 <= self.x <= r.x1:
  12763. if self.y >= r.y1:
  12764. return (self.y - r.y1) * f
  12765. else:
  12766. return (r.y0 - self.y) * f
  12767. else:
  12768. if self.y >= r.y1:
  12769. return self.distance_to(r.bottom_left, unit)
  12770. elif self.y <= r.y0:
  12771. return self.distance_to(r.top_left, unit)
  12772. else:
  12773. return (r.x0 - self.x) * f
  12774. def transform(self, m):
  12775. """Replace point by its transformation with matrix-like m."""
  12776. if len(m) != 6:
  12777. raise ValueError("Matrix: bad seq len")
  12778. self.x, self.y = util_transform_point(self, m)
  12779. return self
  12780. @property
  12781. def unit(self):
  12782. """Unit vector of the point."""
  12783. s = self.x * self.x + self.y * self.y
  12784. if s < EPSILON:
  12785. return Point(0,0)
  12786. s = math.sqrt(s)
  12787. return Point(self.x / s, self.y / s)
  12788. __div__ = __truediv__
  12789. norm = __abs__
  12790. class Quad:
  12791. def __abs__(self):
  12792. if self.is_empty:
  12793. return 0.0
  12794. return abs(self.ul - self.ur) * abs(self.ul - self.ll)
  12795. def __add__(self, q):
  12796. if hasattr(q, "__float__"):
  12797. return Quad(self.ul + q, self.ur + q, self.ll + q, self.lr + q)
  12798. if len(q) != 4:
  12799. raise ValueError("Quad: bad seq len")
  12800. return Quad(self.ul + q[0], self.ur + q[1], self.ll + q[2], self.lr + q[3])
  12801. def __bool__(self):
  12802. return not self.is_empty
  12803. def __contains__(self, x):
  12804. try:
  12805. l = x.__len__()
  12806. except Exception:
  12807. if g_exceptions_verbose > 1: exception_info()
  12808. return False
  12809. if l == 2:
  12810. return util_point_in_quad(x, self)
  12811. if l != 4:
  12812. return False
  12813. if CheckRect(x):
  12814. if Rect(x).is_empty:
  12815. return True
  12816. return util_point_in_quad(x[:2], self) and util_point_in_quad(x[2:], self)
  12817. if CheckQuad(x):
  12818. for i in range(4):
  12819. if not util_point_in_quad(x[i], self):
  12820. return False
  12821. return True
  12822. return False
  12823. def __eq__(self, quad):
  12824. if not hasattr(quad, "__len__"):
  12825. return False
  12826. return len(quad) == 4 and (
  12827. self.ul == quad[0] and
  12828. self.ur == quad[1] and
  12829. self.ll == quad[2] and
  12830. self.lr == quad[3]
  12831. )
  12832. def __getitem__(self, i):
  12833. return (self.ul, self.ur, self.ll, self.lr)[i]
  12834. def __hash__(self):
  12835. return hash(tuple(self))
  12836. def __init__(self, *args, ul=None, ur=None, ll=None, lr=None):
  12837. '''
  12838. Quad() - all zero points
  12839. Quad(ul, ur, ll, lr)
  12840. Quad(quad) - new copy
  12841. Quad(sequence) - from 'sequence'
  12842. Explicit keyword args ul, ur, ll, lr override earlier settings if not
  12843. None.
  12844. '''
  12845. if not args:
  12846. self.ul = self.ur = self.ll = self.lr = Point()
  12847. elif len(args) > 4:
  12848. raise ValueError("Quad: bad seq len")
  12849. elif len(args) == 4:
  12850. self.ul, self.ur, self.ll, self.lr = map(Point, args)
  12851. elif len(args) == 1:
  12852. l = args[0]
  12853. if isinstance(l, mupdf.FzQuad):
  12854. self.this = l
  12855. self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr)
  12856. elif not hasattr(l, "__getitem__"):
  12857. raise ValueError("Quad: bad args")
  12858. elif len(l) != 4:
  12859. raise ValueError("Quad: bad seq len")
  12860. else:
  12861. self.ul, self.ur, self.ll, self.lr = map(Point, l)
  12862. else:
  12863. raise ValueError("Quad: bad args")
  12864. if ul is not None: self.ul = Point(ul)
  12865. if ur is not None: self.ur = Point(ur)
  12866. if ll is not None: self.ll = Point(ll)
  12867. if lr is not None: self.lr = Point(lr)
  12868. def __len__(self):
  12869. return 4
  12870. def __mul__(self, m):
  12871. q = Quad(self)
  12872. q = q.transform(m)
  12873. return q
  12874. def __neg__(self):
  12875. return Quad(-self.ul, -self.ur, -self.ll, -self.lr)
  12876. def __nonzero__(self):
  12877. return not self.is_empty
  12878. def __pos__(self):
  12879. return Quad(self)
  12880. def __repr__(self):
  12881. return "Quad" + str(tuple(self))
  12882. def __setitem__(self, i, v):
  12883. if i == 0: self.ul = Point(v)
  12884. elif i == 1: self.ur = Point(v)
  12885. elif i == 2: self.ll = Point(v)
  12886. elif i == 3: self.lr = Point(v)
  12887. else:
  12888. raise IndexError("index out of range")
  12889. return None
  12890. def __sub__(self, q):
  12891. if hasattr(q, "__float__"):
  12892. return Quad(self.ul - q, self.ur - q, self.ll - q, self.lr - q)
  12893. if len(q) != 4:
  12894. raise ValueError("Quad: bad seq len")
  12895. return Quad(self.ul - q[0], self.ur - q[1], self.ll - q[2], self.lr - q[3])
  12896. def __truediv__(self, m):
  12897. if hasattr(m, "__float__"):
  12898. im = 1. / m
  12899. else:
  12900. im = util_invert_matrix(m)[1]
  12901. if not im:
  12902. raise ZeroDivisionError("Matrix not invertible")
  12903. q = Quad(self)
  12904. q = q.transform(im)
  12905. return q
  12906. @property
  12907. def is_convex(self):
  12908. """Check if quad is convex and not degenerate.
  12909. Notes:
  12910. Check that for the two diagonals, the other two corners are not
  12911. on the same side of the diagonal.
  12912. Returns:
  12913. True or False.
  12914. """
  12915. m = planish_line(self.ul, self.lr) # puts this diagonal on x-axis
  12916. p1 = self.ll * m # transform the
  12917. p2 = self.ur * m # other two points
  12918. if p1.y * p2.y > 0:
  12919. return False
  12920. m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis
  12921. p1 = self.lr * m # transform the
  12922. p2 = self.ul * m # remaining points
  12923. if p1.y * p2.y > 0:
  12924. return False
  12925. return True
  12926. @property
  12927. def is_empty(self):
  12928. """Check whether all quad corners are on the same line.
  12929. This is the case if width or height is zero.
  12930. """
  12931. return self.width < EPSILON or self.height < EPSILON
  12932. @property
  12933. def is_infinite(self):
  12934. """Check whether this is the infinite quad."""
  12935. return self.rect.is_infinite
  12936. @property
  12937. def is_rectangular(self):
  12938. """Check if quad is rectangular.
  12939. Notes:
  12940. Some rotation matrix can thus transform it into a rectangle.
  12941. This is equivalent to three corners enclose 90 degrees.
  12942. Returns:
  12943. True or False.
  12944. """
  12945. sine = util_sine_between(self.ul, self.ur, self.lr)
  12946. if abs(sine - 1) > EPSILON: # the sine of the angle
  12947. return False
  12948. sine = util_sine_between(self.ur, self.lr, self.ll)
  12949. if abs(sine - 1) > EPSILON:
  12950. return False
  12951. sine = util_sine_between(self.lr, self.ll, self.ul)
  12952. if abs(sine - 1) > EPSILON:
  12953. return False
  12954. return True
  12955. def morph(self, p, m):
  12956. """Morph the quad with matrix-like 'm' and point-like 'p'.
  12957. Return a new quad."""
  12958. if self.is_infinite:
  12959. return INFINITE_QUAD()
  12960. delta = Matrix(1, 1).pretranslate(p.x, p.y)
  12961. q = self * ~delta * m * delta
  12962. return q
  12963. @property
  12964. def rect(self):
  12965. r = Rect()
  12966. r.x0 = min(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
  12967. r.y0 = min(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
  12968. r.x1 = max(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
  12969. r.y1 = max(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
  12970. return r
  12971. def transform(self, m):
  12972. """Replace quad by its transformation with matrix m."""
  12973. if hasattr(m, "__float__"):
  12974. pass
  12975. elif len(m) != 6:
  12976. raise ValueError("Matrix: bad seq len")
  12977. self.ul *= m
  12978. self.ur *= m
  12979. self.ll *= m
  12980. self.lr *= m
  12981. return self
  12982. __div__ = __truediv__
  12983. width = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr)))
  12984. height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr)))
  12985. class Rect:
  12986. def __abs__(self):
  12987. if self.is_empty or self.is_infinite:
  12988. return 0.0
  12989. return (self.x1 - self.x0) * (self.y1 - self.y0)
  12990. def __add__(self, p):
  12991. if hasattr(p, "__float__"):
  12992. return Rect(self.x0 + p, self.y0 + p, self.x1 + p, self.y1 + p)
  12993. if len(p) != 4:
  12994. raise ValueError("Rect: bad seq len")
  12995. return Rect(self.x0 + p[0], self.y0 + p[1], self.x1 + p[2], self.y1 + p[3])
  12996. def __and__(self, x):
  12997. if not hasattr(x, "__len__"):
  12998. raise ValueError("bad operand 2")
  12999. r1 = Rect(x)
  13000. r = Rect(self)
  13001. return r.intersect(r1)
  13002. def __bool__(self):
  13003. return not (max(self) == min(self) == 0)
  13004. def __contains__(self, x):
  13005. if hasattr(x, "__float__"):
  13006. return x in tuple(self)
  13007. l = len(x)
  13008. if l == 2:
  13009. return util_is_point_in_rect(x, self)
  13010. if l == 4:
  13011. r = INFINITE_RECT()
  13012. try:
  13013. r = Rect(x)
  13014. except Exception:
  13015. if g_exceptions_verbose > 1: exception_info()
  13016. r = Quad(x).rect
  13017. return (self.x0 <= r.x0 <= r.x1 <= self.x1 and
  13018. self.y0 <= r.y0 <= r.y1 <= self.y1)
  13019. return False
  13020. def __eq__(self, rect):
  13021. if not hasattr(rect, "__len__"):
  13022. return False
  13023. return len(rect) == 4 and not (self - rect)
  13024. def __getitem__(self, i):
  13025. return (self.x0, self.y0, self.x1, self.y1)[i]
  13026. def __hash__(self):
  13027. return hash(tuple(self))
  13028. def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
  13029. """
  13030. Rect() - all zeros
  13031. Rect(x0, y0, x1, y1)
  13032. Rect(top-left, x1, y1)
  13033. Rect(x0, y0, bottom-right)
  13034. Rect(top-left, bottom-right)
  13035. Rect(Rect or IRect) - new copy
  13036. Rect(sequence) - from 'sequence'
  13037. Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings
  13038. if not None.
  13039. """
  13040. x0, y0, x1, y1 = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
  13041. self.x0 = float( x0)
  13042. self.y0 = float( y0)
  13043. self.x1 = float( x1)
  13044. self.y1 = float( y1)
  13045. def __len__(self):
  13046. return 4
  13047. def __mul__(self, m):
  13048. if hasattr(m, "__float__"):
  13049. return Rect(self.x0 * m, self.y0 * m, self.x1 * m, self.y1 * m)
  13050. r = Rect(self)
  13051. r = r.transform(m)
  13052. return r
  13053. def __neg__(self):
  13054. return Rect(-self.x0, -self.y0, -self.x1, -self.y1)
  13055. def __nonzero__(self):
  13056. return not (max(self) == min(self) == 0)
  13057. def __or__(self, x):
  13058. if not hasattr(x, "__len__"):
  13059. raise ValueError("bad operand 2")
  13060. r = Rect(self)
  13061. if len(x) == 2:
  13062. return r.include_point(x)
  13063. if len(x) == 4:
  13064. return r.include_rect(x)
  13065. raise ValueError("bad operand 2")
  13066. def __pos__(self):
  13067. return Rect(self)
  13068. def __repr__(self):
  13069. return "Rect" + str(tuple(self))
  13070. def __setitem__(self, i, v):
  13071. v = float(v)
  13072. if i == 0: self.x0 = v
  13073. elif i == 1: self.y0 = v
  13074. elif i == 2: self.x1 = v
  13075. elif i == 3: self.y1 = v
  13076. else:
  13077. raise IndexError("index out of range")
  13078. return None
  13079. def __sub__(self, p):
  13080. if hasattr(p, "__float__"):
  13081. return Rect(self.x0 - p, self.y0 - p, self.x1 - p, self.y1 - p)
  13082. if len(p) != 4:
  13083. raise ValueError("Rect: bad seq len")
  13084. return Rect(self.x0 - p[0], self.y0 - p[1], self.x1 - p[2], self.y1 - p[3])
  13085. def __truediv__(self, m):
  13086. if hasattr(m, "__float__"):
  13087. return Rect(self.x0 * 1./m, self.y0 * 1./m, self.x1 * 1./m, self.y1 * 1./m)
  13088. im = util_invert_matrix(m)[1]
  13089. if not im:
  13090. raise ZeroDivisionError(f"Matrix not invertible: {m}")
  13091. r = Rect(self)
  13092. r = r.transform(im)
  13093. return r
  13094. @property
  13095. def bottom_left(self):
  13096. """Bottom-left corner."""
  13097. return Point(self.x0, self.y1)
  13098. @property
  13099. def bottom_right(self):
  13100. """Bottom-right corner."""
  13101. return Point(self.x1, self.y1)
  13102. def contains(self, x):
  13103. """Check if containing point-like or rect-like x."""
  13104. return self.__contains__(x)
  13105. @property
  13106. def height(self):
  13107. return max(0, self.y1 - self.y0)
  13108. def get_area(self, *args) -> float:
  13109. """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
  13110. return _rect_area(self.width, self.height, args)
  13111. def include_point(self, p):
  13112. """Extend to include point-like p."""
  13113. if len(p) != 2:
  13114. raise ValueError("Point: bad seq len")
  13115. self.x0, self.y0, self.x1, self.y1 = util_include_point_in_rect(self, p)
  13116. return self
  13117. def include_rect(self, r):
  13118. """Extend to include rect-like r."""
  13119. if len(r) != 4:
  13120. raise ValueError("Rect: bad seq len")
  13121. r = Rect(r)
  13122. if r.is_infinite or self.is_infinite:
  13123. self.x0, self.y0, self.x1, self.y1 = FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT
  13124. elif r.is_empty:
  13125. return self
  13126. elif self.is_empty:
  13127. self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
  13128. else:
  13129. self.x0, self.y0, self.x1, self.y1 = util_union_rect(self, r)
  13130. return self
  13131. def intersect(self, r):
  13132. """Restrict to common rect with rect-like r."""
  13133. if not len(r) == 4:
  13134. raise ValueError("Rect: bad seq len")
  13135. r = Rect(r)
  13136. if r.is_infinite:
  13137. return self
  13138. elif self.is_infinite:
  13139. self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
  13140. elif r.is_empty:
  13141. self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
  13142. elif self.is_empty:
  13143. return self
  13144. else:
  13145. self.x0, self.y0, self.x1, self.y1 = util_intersect_rect(self, r)
  13146. return self
  13147. def intersects(self, x):
  13148. """Check if intersection with rectangle x is not empty."""
  13149. rect2 = Rect(x)
  13150. return (1
  13151. and not self.is_empty
  13152. and not self.is_infinite
  13153. and not rect2.is_empty
  13154. and not rect2.is_infinite
  13155. and self.x0 < rect2.x1
  13156. and rect2.x0 < self.x1
  13157. and self.y0 < rect2.y1
  13158. and rect2.y0 < self.y1
  13159. )
  13160. @property
  13161. def is_empty(self):
  13162. """True if rectangle area is empty."""
  13163. return self.x0 >= self.x1 or self.y0 >= self.y1
  13164. @property
  13165. def is_infinite(self):
  13166. """True if this is the infinite rectangle."""
  13167. return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
  13168. @property
  13169. def is_valid(self):
  13170. """True if rectangle is valid."""
  13171. return self.x0 <= self.x1 and self.y0 <= self.y1
  13172. def morph(self, p, m):
  13173. """Morph with matrix-like m and point-like p.
  13174. Returns a new quad."""
  13175. if self.is_infinite:
  13176. return INFINITE_QUAD()
  13177. return self.quad.morph(p, m)
  13178. def norm(self):
  13179. return math.sqrt(sum([c*c for c in self]))
  13180. def normalize(self):
  13181. """Replace rectangle with its finite version."""
  13182. if self.x1 < self.x0:
  13183. self.x0, self.x1 = self.x1, self.x0
  13184. if self.y1 < self.y0:
  13185. self.y0, self.y1 = self.y1, self.y0
  13186. return self
  13187. @property
  13188. def quad(self):
  13189. """Return Quad version of rectangle."""
  13190. return Quad(self.tl, self.tr, self.bl, self.br)
  13191. def round(self):
  13192. """Return the IRect."""
  13193. return IRect(util_round_rect(self))
  13194. @property
  13195. def top_left(self):
  13196. """Top-left corner."""
  13197. return Point(self.x0, self.y0)
  13198. @property
  13199. def top_right(self):
  13200. """Top-right corner."""
  13201. return Point(self.x1, self.y0)
  13202. def torect(self, r):
  13203. """Return matrix that converts to target rect."""
  13204. r = Rect(r)
  13205. if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
  13206. raise ValueError("rectangles must be finite and not empty")
  13207. return (
  13208. Matrix(1, 0, 0, 1, -self.x0, -self.y0)
  13209. * Matrix(r.width / self.width, r.height / self.height)
  13210. * Matrix(1, 0, 0, 1, r.x0, r.y0)
  13211. )
  13212. def transform(self, m):
  13213. """Replace with the transformation by matrix-like m."""
  13214. if not len(m) == 6:
  13215. raise ValueError("Matrix: bad seq len")
  13216. self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m)
  13217. return self
  13218. @property
  13219. def width(self):
  13220. return max(0, self.x1 - self.x0)
  13221. __div__ = __truediv__
  13222. bl = bottom_left
  13223. br = bottom_right
  13224. irect = property(round)
  13225. tl = top_left
  13226. tr = top_right
  13227. class Shape:
  13228. """Create a new shape."""
  13229. @staticmethod
  13230. def horizontal_angle(C, P):
  13231. """Return the angle to the horizontal for the connection from C to P.
  13232. This uses the arcus sine function and resolves its inherent ambiguity by
  13233. looking up in which quadrant vector S = P - C is located.
  13234. """
  13235. S = Point(P - C).unit # unit vector 'C' -> 'P'
  13236. alfa = math.asin(abs(S.y)) # absolute angle from horizontal
  13237. if S.x < 0: # make arcsin result unique
  13238. if S.y <= 0: # bottom-left
  13239. alfa = -(math.pi - alfa)
  13240. else: # top-left
  13241. alfa = math.pi - alfa
  13242. else:
  13243. if S.y >= 0: # top-right
  13244. pass
  13245. else: # bottom-right
  13246. alfa = -alfa
  13247. return alfa
  13248. def __init__(self, page: Page):
  13249. CheckParent(page)
  13250. self.page = page
  13251. self.doc = page.parent
  13252. if not self.doc.is_pdf:
  13253. raise ValueError("is no PDF")
  13254. self.height = page.mediabox_size.y
  13255. self.width = page.mediabox_size.x
  13256. self.x = page.cropbox_position.x
  13257. self.y = page.cropbox_position.y
  13258. self.pctm = page.transformation_matrix # page transf. matrix
  13259. self.ipctm = ~self.pctm # inverted transf. matrix
  13260. self.draw_cont = ""
  13261. self.text_cont = ""
  13262. self.totalcont = ""
  13263. self.last_point = None
  13264. self.rect = None
  13265. def updateRect(self, x):
  13266. if self.rect is None:
  13267. if len(x) == 2:
  13268. self.rect = Rect(x, x)
  13269. else:
  13270. self.rect = Rect(x)
  13271. else:
  13272. if len(x) == 2:
  13273. x = Point(x)
  13274. self.rect.x0 = min(self.rect.x0, x.x)
  13275. self.rect.y0 = min(self.rect.y0, x.y)
  13276. self.rect.x1 = max(self.rect.x1, x.x)
  13277. self.rect.y1 = max(self.rect.y1, x.y)
  13278. else:
  13279. x = Rect(x)
  13280. self.rect.x0 = min(self.rect.x0, x.x0)
  13281. self.rect.y0 = min(self.rect.y0, x.y0)
  13282. self.rect.x1 = max(self.rect.x1, x.x1)
  13283. self.rect.y1 = max(self.rect.y1, x.y1)
  13284. def draw_line(self, p1: point_like, p2: point_like) -> Point:
  13285. """Draw a line between two points."""
  13286. p1 = Point(p1)
  13287. p2 = Point(p2)
  13288. if not (self.last_point == p1):
  13289. self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
  13290. self.last_point = p1
  13291. self.updateRect(p1)
  13292. self.draw_cont += _format_g(JM_TUPLE(p2 * self.ipctm)) + " l\n"
  13293. self.updateRect(p2)
  13294. self.last_point = p2
  13295. return self.last_point
  13296. def draw_polyline(self, points: list) -> Point:
  13297. """Draw several connected line segments."""
  13298. for i, p in enumerate(points):
  13299. if i == 0:
  13300. if not (self.last_point == Point(p)):
  13301. self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " m\n"
  13302. self.last_point = Point(p)
  13303. else:
  13304. self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " l\n"
  13305. self.updateRect(p)
  13306. self.last_point = Point(points[-1])
  13307. return self.last_point
  13308. def draw_bezier(
  13309. self,
  13310. p1: point_like,
  13311. p2: point_like,
  13312. p3: point_like,
  13313. p4: point_like,
  13314. ) -> Point:
  13315. """Draw a standard cubic Bezier curve."""
  13316. p1 = Point(p1)
  13317. p2 = Point(p2)
  13318. p3 = Point(p3)
  13319. p4 = Point(p4)
  13320. if not (self.last_point == p1):
  13321. self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
  13322. args = JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
  13323. self.draw_cont += _format_g(args) + " c\n"
  13324. self.updateRect(p1)
  13325. self.updateRect(p2)
  13326. self.updateRect(p3)
  13327. self.updateRect(p4)
  13328. self.last_point = p4
  13329. return self.last_point
  13330. def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> Point:
  13331. """Draw an ellipse inside a tetrapod."""
  13332. if len(tetra) != 4:
  13333. raise ValueError("invalid arg length")
  13334. if hasattr(tetra[0], "__float__"):
  13335. q = Rect(tetra).quad
  13336. else:
  13337. q = Quad(tetra)
  13338. mt = q.ul + (q.ur - q.ul) * 0.5
  13339. mr = q.ur + (q.lr - q.ur) * 0.5
  13340. mb = q.ll + (q.lr - q.ll) * 0.5
  13341. ml = q.ul + (q.ll - q.ul) * 0.5
  13342. if not (self.last_point == ml):
  13343. self.draw_cont += _format_g(JM_TUPLE(ml * self.ipctm)) + " m\n"
  13344. self.last_point = ml
  13345. self.draw_curve(ml, q.ll, mb)
  13346. self.draw_curve(mb, q.lr, mr)
  13347. self.draw_curve(mr, q.ur, mt)
  13348. self.draw_curve(mt, q.ul, ml)
  13349. self.updateRect(q.rect)
  13350. self.last_point = ml
  13351. return self.last_point
  13352. def draw_circle(self, center: point_like, radius: float) -> Point:
  13353. """Draw a circle given its center and radius."""
  13354. if not radius > EPSILON:
  13355. raise ValueError("radius must be positive")
  13356. center = Point(center)
  13357. p1 = center - (radius, 0)
  13358. return self.draw_sector(center, p1, 360, fullSector=False)
  13359. def draw_curve(
  13360. self,
  13361. p1: point_like,
  13362. p2: point_like,
  13363. p3: point_like,
  13364. ) -> Point:
  13365. """Draw a curve between points using one control point."""
  13366. kappa = 0.55228474983
  13367. p1 = Point(p1)
  13368. p2 = Point(p2)
  13369. p3 = Point(p3)
  13370. k1 = p1 + (p2 - p1) * kappa
  13371. k2 = p3 + (p2 - p3) * kappa
  13372. return self.draw_bezier(p1, k1, k2, p3)
  13373. def draw_sector(
  13374. self,
  13375. center: point_like,
  13376. point: point_like,
  13377. beta: float,
  13378. fullSector: bool = True,
  13379. ) -> Point:
  13380. """Draw a circle sector."""
  13381. center = Point(center)
  13382. point = Point(point)
  13383. l3 = lambda a, b: _format_g((a, b)) + " m\n"
  13384. l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n"
  13385. l5 = lambda a, b: _format_g((a, b)) + " l\n"
  13386. betar = math.radians(-beta)
  13387. w360 = math.radians(math.copysign(360, betar)) * (-1)
  13388. w90 = math.radians(math.copysign(90, betar))
  13389. w45 = w90 / 2
  13390. while abs(betar) > 2 * math.pi:
  13391. betar += w360 # bring angle below 360 degrees
  13392. if not (self.last_point == point):
  13393. self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
  13394. self.last_point = point
  13395. Q = Point(0, 0) # just make sure it exists
  13396. C = center
  13397. P = point
  13398. S = P - C # vector 'center' -> 'point'
  13399. rad = abs(S) # circle radius
  13400. if not rad > EPSILON:
  13401. raise ValueError("radius must be positive")
  13402. alfa = self.horizontal_angle(center, point)
  13403. while abs(betar) > abs(w90): # draw 90 degree arcs
  13404. q1 = C.x + math.cos(alfa + w90) * rad
  13405. q2 = C.y + math.sin(alfa + w90) * rad
  13406. Q = Point(q1, q2) # the arc's end point
  13407. r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
  13408. r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
  13409. R = Point(r1, r2) # crossing point of tangents
  13410. kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
  13411. kappa = kappah * abs(P - Q)
  13412. cp1 = P + (R - P) * kappa # control point 1
  13413. cp2 = Q + (R - Q) * kappa # control point 2
  13414. self.draw_cont += l4(*JM_TUPLE(
  13415. list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
  13416. ))
  13417. betar -= w90 # reduce param angle by 90 deg
  13418. alfa += w90 # advance start angle by 90 deg
  13419. P = Q # advance to arc end point
  13420. # draw (remaining) arc
  13421. if abs(betar) > 1e-3: # significant degrees left?
  13422. beta2 = betar / 2
  13423. q1 = C.x + math.cos(alfa + betar) * rad
  13424. q2 = C.y + math.sin(alfa + betar) * rad
  13425. Q = Point(q1, q2) # the arc's end point
  13426. r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
  13427. r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
  13428. R = Point(r1, r2) # crossing point of tangents
  13429. # kappa height is 4/3 of segment height
  13430. kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q) # kappa height
  13431. kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
  13432. cp1 = P + (R - P) * kappa # control point 1
  13433. cp2 = Q + (R - Q) * kappa # control point 2
  13434. self.draw_cont += l4(*JM_TUPLE(
  13435. list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
  13436. ))
  13437. if fullSector:
  13438. self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
  13439. self.draw_cont += l5(*JM_TUPLE(center * self.ipctm))
  13440. self.draw_cont += l5(*JM_TUPLE(Q * self.ipctm))
  13441. self.last_point = Q
  13442. return self.last_point
  13443. def draw_rect(self, rect: rect_like, *, radius=None) -> Point:
  13444. """Draw a rectangle.
  13445. Args:
  13446. radius: if not None, the rectangle will have rounded corners.
  13447. This is the radius of the curvature, given as percentage of
  13448. the rectangle width or height. Valid are values 0 < v <= 0.5.
  13449. For a sequence of two values, the corners will have different
  13450. radii. Otherwise, the percentage will be computed from the
  13451. shorter side. A value of (0.5, 0.5) will draw an ellipse.
  13452. """
  13453. r = Rect(rect)
  13454. if radius is None: # standard rectangle
  13455. self.draw_cont += _format_g(JM_TUPLE(
  13456. list(r.bl * self.ipctm) + [r.width, r.height]
  13457. )) + " re\n"
  13458. self.updateRect(r)
  13459. self.last_point = r.tl
  13460. return self.last_point
  13461. # rounded corners requested. This requires 1 or 2 values, each
  13462. # with 0 < value <= 0.5
  13463. if hasattr(radius, "__float__"):
  13464. if radius <= 0 or radius > 0.5:
  13465. raise ValueError(f"bad radius value {radius}.")
  13466. d = min(r.width, r.height) * radius
  13467. px = (d, 0)
  13468. py = (0, d)
  13469. elif hasattr(radius, "__len__") and len(radius) == 2:
  13470. rx, ry = radius
  13471. px = (rx * r.width, 0)
  13472. py = (0, ry * r.height)
  13473. if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
  13474. raise ValueError(f"bad radius value {radius}.")
  13475. else:
  13476. raise ValueError(f"bad radius value {radius}.")
  13477. lp = self.draw_line(r.tl + py, r.bl - py)
  13478. lp = self.draw_curve(lp, r.bl, r.bl + px)
  13479. lp = self.draw_line(lp, r.br - px)
  13480. lp = self.draw_curve(lp, r.br, r.br - py)
  13481. lp = self.draw_line(lp, r.tr + py)
  13482. lp = self.draw_curve(lp, r.tr, r.tr - px)
  13483. lp = self.draw_line(lp, r.tl + px)
  13484. self.last_point = self.draw_curve(lp, r.tl, r.tl + py)
  13485. self.updateRect(r)
  13486. return self.last_point
  13487. def draw_quad(self, quad: quad_like) -> Point:
  13488. """Draw a Quad."""
  13489. q = Quad(quad)
  13490. return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
  13491. def draw_zigzag(
  13492. self,
  13493. p1: point_like,
  13494. p2: point_like,
  13495. breadth: float = 2,
  13496. ) -> Point:
  13497. """Draw a zig-zagged line from p1 to p2."""
  13498. p1 = Point(p1)
  13499. p2 = Point(p2)
  13500. S = p2 - p1 # vector start - end
  13501. rad = abs(S) # distance of points
  13502. cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases
  13503. if cnt < 4:
  13504. raise ValueError("points too close")
  13505. mb = rad / cnt # revised breadth
  13506. matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis
  13507. i_mat = ~matrix # get original position
  13508. points = [] # stores edges
  13509. for i in range(1, cnt):
  13510. if i % 4 == 1: # point "above" connection
  13511. p = Point(i, -1) * mb
  13512. elif i % 4 == 3: # point "below" connection
  13513. p = Point(i, 1) * mb
  13514. else: # ignore others
  13515. continue
  13516. points.append(p * i_mat)
  13517. self.draw_polyline([p1] + points + [p2]) # add start and end points
  13518. return p2
  13519. def draw_squiggle(
  13520. self,
  13521. p1: point_like,
  13522. p2: point_like,
  13523. breadth=2,
  13524. ) -> Point:
  13525. """Draw a squiggly line from p1 to p2."""
  13526. p1 = Point(p1)
  13527. p2 = Point(p2)
  13528. S = p2 - p1 # vector start - end
  13529. rad = abs(S) # distance of points
  13530. cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases
  13531. if cnt < 4:
  13532. raise ValueError("points too close")
  13533. mb = rad / cnt # revised breadth
  13534. matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis
  13535. i_mat = ~matrix # get original position
  13536. k = 2.4142135623765633 # y of draw_curve helper point
  13537. points = [] # stores edges
  13538. for i in range(1, cnt):
  13539. if i % 4 == 1: # point "above" connection
  13540. p = Point(i, -k) * mb
  13541. elif i % 4 == 3: # point "below" connection
  13542. p = Point(i, k) * mb
  13543. else: # else on connection line
  13544. p = Point(i, 0) * mb
  13545. points.append(p * i_mat)
  13546. points = [p1] + points + [p2]
  13547. cnt = len(points)
  13548. i = 0
  13549. while i + 2 < cnt:
  13550. self.draw_curve(points[i], points[i + 1], points[i + 2])
  13551. i += 2
  13552. return p2
  13553. # ==============================================================================
  13554. # Shape.insert_text
  13555. # ==============================================================================
  13556. def insert_text(
  13557. self,
  13558. point: point_like,
  13559. buffer: typing.Union[str, list],
  13560. *,
  13561. fontsize: float = 11,
  13562. lineheight: OptFloat = None,
  13563. fontname: str = "helv",
  13564. fontfile: OptStr = None,
  13565. set_simple: bool = 0,
  13566. encoding: int = 0,
  13567. color: OptSeq = None,
  13568. fill: OptSeq = None,
  13569. render_mode: int = 0,
  13570. border_width: float = 0.05,
  13571. miter_limit: float = 1,
  13572. rotate: int = 0,
  13573. morph: OptSeq = None,
  13574. stroke_opacity: float = 1,
  13575. fill_opacity: float = 1,
  13576. oc: int = 0,
  13577. ) -> int:
  13578. # ensure 'text' is a list of strings, worth dealing with
  13579. if not bool(buffer):
  13580. return 0
  13581. if type(buffer) not in (list, tuple):
  13582. text = buffer.splitlines()
  13583. else:
  13584. text = buffer
  13585. if not len(text) > 0:
  13586. return 0
  13587. point = Point(point)
  13588. try:
  13589. maxcode = max([ord(c) for c in " ".join(text)])
  13590. except Exception:
  13591. exception_info()
  13592. return 0
  13593. # ensure valid 'fontname'
  13594. fname = fontname
  13595. if fname.startswith("/"):
  13596. fname = fname[1:]
  13597. xref = self.page.insert_font(
  13598. fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
  13599. )
  13600. fontinfo = CheckFontInfo(self.doc, xref)
  13601. fontdict = fontinfo[1]
  13602. ordering = fontdict["ordering"]
  13603. simple = fontdict["simple"]
  13604. bfname = fontdict["name"]
  13605. ascender = fontdict["ascender"]
  13606. descender = fontdict["descender"]
  13607. if lineheight:
  13608. lheight = fontsize * lineheight
  13609. elif ascender - descender <= 1:
  13610. lheight = fontsize * 1.2
  13611. else:
  13612. lheight = fontsize * (ascender - descender)
  13613. if maxcode > 255:
  13614. glyphs = self.doc.get_char_widths(xref, maxcode + 1)
  13615. else:
  13616. glyphs = fontdict["glyphs"]
  13617. tab = []
  13618. for t in text:
  13619. if simple and bfname not in ("Symbol", "ZapfDingbats"):
  13620. g = None
  13621. else:
  13622. g = glyphs
  13623. tab.append(getTJstr(t, g, simple, ordering))
  13624. text = tab
  13625. color_str = ColorCode(color, "c")
  13626. fill_str = ColorCode(fill, "f")
  13627. if not fill and render_mode == 0: # ensure fill color when 0 Tr
  13628. fill = color
  13629. fill_str = ColorCode(color, "f")
  13630. morphing = CheckMorph(morph)
  13631. rot = rotate
  13632. if rot % 90 != 0:
  13633. raise ValueError("bad rotate value")
  13634. while rot < 0:
  13635. rot += 360
  13636. rot = rot % 360 # text rotate = 0, 90, 270, 180
  13637. templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf "
  13638. templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
  13639. cmp90 = "0 1 -1 0 0 0 cm\n" # rotates 90 deg counter-clockwise
  13640. cmm90 = "0 -1 1 0 0 0 cm\n" # rotates 90 deg clockwise
  13641. cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg.
  13642. height = self.height
  13643. width = self.width
  13644. # setting up for standard rotation directions
  13645. # case rotate = 0
  13646. if morphing:
  13647. m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
  13648. mat = ~m1 * morph[1] * m1
  13649. cm = _format_g(JM_TUPLE(mat)) + " cm\n"
  13650. else:
  13651. cm = ""
  13652. top = height - point.y - self.y # start of 1st char
  13653. left = point.x + self.x # start of 1. char
  13654. space = top # space available
  13655. #headroom = point.y + self.y # distance to page border
  13656. if rot == 90:
  13657. left = height - point.y - self.y
  13658. top = -point.x - self.x
  13659. cm += cmp90
  13660. space = width - abs(top)
  13661. #headroom = point.x + self.x
  13662. elif rot == 270:
  13663. left = -height + point.y + self.y
  13664. top = point.x + self.x
  13665. cm += cmm90
  13666. space = abs(top)
  13667. #headroom = width - point.x - self.x
  13668. elif rot == 180:
  13669. left = -point.x - self.x
  13670. top = -height + point.y + self.y
  13671. cm += cm180
  13672. space = abs(point.y + self.y)
  13673. #headroom = height - point.y - self.y
  13674. optcont = self.page._get_optional_content(oc)
  13675. if optcont is not None:
  13676. bdc = "/OC /%s BDC\n" % optcont
  13677. emc = "EMC\n"
  13678. else:
  13679. bdc = emc = ""
  13680. alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
  13681. if alpha is None:
  13682. alpha = ""
  13683. else:
  13684. alpha = "/%s gs\n" % alpha
  13685. nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)
  13686. if render_mode > 0:
  13687. nres += "%i Tr " % render_mode
  13688. nres += _format_g(border_width * fontsize) + " w "
  13689. if miter_limit is not None:
  13690. nres += _format_g(miter_limit) + " M "
  13691. if color is not None:
  13692. nres += color_str
  13693. if fill is not None:
  13694. nres += fill_str
  13695. # =========================================================================
  13696. # start text insertion
  13697. # =========================================================================
  13698. nres += text[0]
  13699. nlines = 1 # set output line counter
  13700. if len(text) > 1:
  13701. nres += templ2(lheight) # line 1
  13702. else:
  13703. nres += 'TJ'
  13704. for i in range(1, len(text)):
  13705. if space < lheight:
  13706. break # no space left on page
  13707. if i > 1:
  13708. nres += "\nT* "
  13709. nres += text[i] + 'TJ'
  13710. space -= lheight
  13711. nlines += 1
  13712. nres += "\nET\n%sQ\n" % emc
  13713. # =========================================================================
  13714. # end of text insertion
  13715. # =========================================================================
  13716. # update the /Contents object
  13717. self.text_cont += nres
  13718. return nlines
  13719. # ==============================================================================
  13720. # Shape.insert_textbox
  13721. # ==============================================================================
  13722. def insert_textbox(
  13723. self,
  13724. rect: rect_like,
  13725. buffer: typing.Union[str, list],
  13726. *,
  13727. fontname: OptStr = "helv",
  13728. fontfile: OptStr = None,
  13729. fontsize: float = 11,
  13730. lineheight: OptFloat = None,
  13731. set_simple: bool = 0,
  13732. encoding: int = 0,
  13733. color: OptSeq = None,
  13734. fill: OptSeq = None,
  13735. expandtabs: int = 1,
  13736. border_width: float = 0.05,
  13737. miter_limit: float = 1,
  13738. align: int = 0,
  13739. render_mode: int = 0,
  13740. rotate: int = 0,
  13741. morph: OptSeq = None,
  13742. stroke_opacity: float = 1,
  13743. fill_opacity: float = 1,
  13744. oc: int = 0,
  13745. ) -> float:
  13746. """Insert text into a given rectangle.
  13747. Args:
  13748. rect -- the textbox to fill
  13749. buffer -- text to be inserted
  13750. fontname -- a Base-14 font, font name or '/name'
  13751. fontfile -- name of a font file
  13752. fontsize -- font size
  13753. lineheight -- overwrite the font property
  13754. color -- RGB stroke color triple
  13755. fill -- RGB fill color triple
  13756. render_mode -- text rendering control
  13757. border_width -- thickness of glyph borders as percentage of fontsize
  13758. expandtabs -- handles tabulators with string function
  13759. align -- left, center, right, justified
  13760. rotate -- 0, 90, 180, or 270 degrees
  13761. morph -- morph box with a matrix and a fixpoint
  13762. Returns:
  13763. unused or deficit rectangle area (float)
  13764. """
  13765. rect = Rect(rect)
  13766. if rect.is_empty or rect.is_infinite:
  13767. raise ValueError("text box must be finite and not empty")
  13768. color_str = ColorCode(color, "c")
  13769. fill_str = ColorCode(fill, "f")
  13770. if fill is None and render_mode == 0: # ensure fill color for 0 Tr
  13771. fill = color
  13772. fill_str = ColorCode(color, "f")
  13773. optcont = self.page._get_optional_content(oc)
  13774. if optcont is not None:
  13775. bdc = "/OC /%s BDC\n" % optcont
  13776. emc = "EMC\n"
  13777. else:
  13778. bdc = emc = ""
  13779. # determine opacity / transparency
  13780. alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
  13781. if alpha is None:
  13782. alpha = ""
  13783. else:
  13784. alpha = "/%s gs\n" % alpha
  13785. if rotate % 90 != 0:
  13786. raise ValueError("rotate must be multiple of 90")
  13787. rot = rotate
  13788. while rot < 0:
  13789. rot += 360
  13790. rot = rot % 360
  13791. # is buffer worth of dealing with?
  13792. if not bool(buffer):
  13793. return rect.height if rot in (0, 180) else rect.width
  13794. cmp90 = "0 1 -1 0 0 0 cm\n" # rotates counter-clockwise
  13795. cmm90 = "0 -1 1 0 0 0 cm\n" # rotates clockwise
  13796. cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg.
  13797. height = self.height
  13798. fname = fontname
  13799. if fname.startswith("/"):
  13800. fname = fname[1:]
  13801. xref = self.page.insert_font(
  13802. fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
  13803. )
  13804. fontinfo = CheckFontInfo(self.doc, xref)
  13805. fontdict = fontinfo[1]
  13806. ordering = fontdict["ordering"]
  13807. simple = fontdict["simple"]
  13808. glyphs = fontdict["glyphs"]
  13809. bfname = fontdict["name"]
  13810. ascender = fontdict["ascender"]
  13811. descender = fontdict["descender"]
  13812. if lineheight:
  13813. lheight_factor = lineheight
  13814. elif ascender - descender <= 1:
  13815. lheight_factor = 1.2
  13816. else:
  13817. lheight_factor = ascender - descender
  13818. lheight = fontsize * lheight_factor
  13819. # create a list from buffer, split into its lines
  13820. if type(buffer) in (list, tuple):
  13821. t0 = "\n".join(buffer)
  13822. else:
  13823. t0 = buffer
  13824. maxcode = max([ord(c) for c in t0])
  13825. # replace invalid char codes for simple fonts
  13826. if simple and maxcode > 255:
  13827. t0 = "".join([c if ord(c) < 256 else "?" for c in t0])
  13828. t0 = t0.splitlines()
  13829. glyphs = self.doc.get_char_widths(xref, maxcode + 1)
  13830. if simple and bfname not in ("Symbol", "ZapfDingbats"):
  13831. tj_glyphs = None
  13832. else:
  13833. tj_glyphs = glyphs
  13834. # ----------------------------------------------------------------------
  13835. # calculate pixel length of a string
  13836. # ----------------------------------------------------------------------
  13837. def pixlen(x):
  13838. """Calculate pixel length of x."""
  13839. if ordering < 0:
  13840. return sum([glyphs[ord(c)][1] for c in x]) * fontsize
  13841. else:
  13842. return len(x) * fontsize
  13843. # ---------------------------------------------------------------------
  13844. if ordering < 0:
  13845. blen = glyphs[32][1] * fontsize # pixel size of space character
  13846. else:
  13847. blen = fontsize
  13848. text = "" # output buffer
  13849. if CheckMorph(morph):
  13850. m1 = Matrix(
  13851. 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
  13852. )
  13853. mat = ~m1 * morph[1] * m1
  13854. cm = _format_g(JM_TUPLE(mat)) + " cm\n"
  13855. else:
  13856. cm = ""
  13857. # ---------------------------------------------------------------------
  13858. # adjust for text orientation / rotation
  13859. # ---------------------------------------------------------------------
  13860. progr = 1 # direction of line progress
  13861. c_pnt = Point(0, fontsize * ascender) # used for line progress
  13862. if rot == 0: # normal orientation
  13863. point = rect.tl + c_pnt # line 1 is 'lheight' below top
  13864. maxwidth = rect.width # pixels available in one line
  13865. maxheight = rect.height # available text height
  13866. elif rot == 90: # rotate counter clockwise
  13867. c_pnt = Point(fontsize * ascender, 0) # progress in x-direction
  13868. point = rect.bl + c_pnt # line 1 'lheight' away from left
  13869. maxwidth = rect.height # pixels available in one line
  13870. maxheight = rect.width # available text height
  13871. cm += cmp90
  13872. elif rot == 180: # text upside down
  13873. # progress upwards in y direction
  13874. c_pnt = -Point(0, fontsize * ascender)
  13875. point = rect.br + c_pnt # line 1 'lheight' above bottom
  13876. maxwidth = rect.width # pixels available in one line
  13877. progr = -1 # subtract lheight for next line
  13878. maxheight =rect.height # available text height
  13879. cm += cm180
  13880. else: # rotate clockwise (270 or -90)
  13881. # progress from right to left
  13882. c_pnt = -Point(fontsize * ascender, 0)
  13883. point = rect.tr + c_pnt # line 1 'lheight' left of right
  13884. maxwidth = rect.height # pixels available in one line
  13885. progr = -1 # subtract lheight for next line
  13886. maxheight = rect.width # available text height
  13887. cm += cmm90
  13888. # =====================================================================
  13889. # line loop
  13890. # =====================================================================
  13891. just_tab = [] # 'justify' indicators per line
  13892. for i, line in enumerate(t0):
  13893. line_t = line.expandtabs(expandtabs).split(" ") # split into words
  13894. num_words = len(line_t)
  13895. lbuff = "" # init line buffer
  13896. rest = maxwidth # available line pixels
  13897. # =================================================================
  13898. # word loop
  13899. # =================================================================
  13900. for j in range(num_words):
  13901. word = line_t[j]
  13902. pl_w = pixlen(word) # pixel len of word
  13903. if rest >= pl_w: # does it fit on the line?
  13904. lbuff += word + " " # yes, append word
  13905. rest -= pl_w + blen # update available line space
  13906. continue # next word
  13907. # word doesn't fit - output line (if not empty)
  13908. if lbuff:
  13909. lbuff = lbuff.rstrip() + "\n" # line full, append line break
  13910. text += lbuff # append to total text
  13911. just_tab.append(True) # can align-justify
  13912. lbuff = "" # re-init line buffer
  13913. rest = maxwidth # re-init avail. space
  13914. if pl_w <= maxwidth: # word shorter than 1 line?
  13915. lbuff = word + " " # start the line with it
  13916. rest = maxwidth - pl_w - blen # update free space
  13917. continue
  13918. # long word: split across multiple lines - char by char ...
  13919. if len(just_tab) > 0:
  13920. just_tab[-1] = False # cannot align-justify
  13921. for c in word:
  13922. if pixlen(lbuff) <= maxwidth - pixlen(c):
  13923. lbuff += c
  13924. else: # line full
  13925. lbuff += "\n" # close line
  13926. text += lbuff # append to text
  13927. just_tab.append(False) # cannot align-justify
  13928. lbuff = c # start new line with this char
  13929. lbuff += " " # finish long word
  13930. rest = maxwidth - pixlen(lbuff) # long word stored
  13931. if lbuff: # unprocessed line content?
  13932. text += lbuff.rstrip() # append to text
  13933. just_tab.append(False) # cannot align-justify
  13934. if i < len(t0) - 1: # not the last line?
  13935. text += "\n" # insert line break
  13936. # compute used part of the textbox
  13937. if text.endswith("\n"):
  13938. text = text[:-1]
  13939. lb_count = text.count("\n") + 1 # number of lines written
  13940. # text height = line count * line height plus one descender value
  13941. text_height = lheight * lb_count - descender * fontsize
  13942. more = text_height - maxheight # difference to height limit
  13943. if more > EPSILON: # landed too much outside rect
  13944. return (-1) * more # return deficit, don't output
  13945. more = abs(more)
  13946. if more < EPSILON:
  13947. more = 0 # don't bother with epsilons
  13948. nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm # initialize output buffer
  13949. templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf "
  13950. # center, right, justify: output each line with its own specifics
  13951. text_t = text.splitlines() # split text in lines again
  13952. just_tab[-1] = False # never justify last line
  13953. for i, t in enumerate(text_t):
  13954. spacing = 0
  13955. pl = maxwidth - pixlen(t) # length of empty line part
  13956. pnt = point + c_pnt * (i * lheight_factor) # text start of line
  13957. if align == 1: # center: right shift by half width
  13958. if rot in (0, 180):
  13959. pnt = pnt + Point(pl / 2, 0) * progr
  13960. else:
  13961. pnt = pnt - Point(0, pl / 2) * progr
  13962. elif align == 2: # right: right shift by full width
  13963. if rot in (0, 180):
  13964. pnt = pnt + Point(pl, 0) * progr
  13965. else:
  13966. pnt = pnt - Point(0, pl) * progr
  13967. elif align == 3: # justify
  13968. spaces = t.count(" ") # number of spaces in line
  13969. if spaces > 0 and just_tab[i]: # if any, and we may justify
  13970. spacing = pl / spaces # make every space this much larger
  13971. else:
  13972. spacing = 0 # keep normal space length
  13973. top = height - pnt.y - self.y
  13974. left = pnt.x + self.x
  13975. if rot == 90:
  13976. left = height - pnt.y - self.y
  13977. top = -pnt.x - self.x
  13978. elif rot == 270:
  13979. left = -height + pnt.y + self.y
  13980. top = pnt.x + self.x
  13981. elif rot == 180:
  13982. left = -pnt.x - self.x
  13983. top = -height + pnt.y + self.y
  13984. nres += templ(left, top, fname, fontsize)
  13985. if render_mode > 0:
  13986. nres += "%i Tr " % render_mode
  13987. nres += _format_g(border_width * fontsize) + " w "
  13988. if miter_limit is not None:
  13989. nres += _format_g(miter_limit) + " M "
  13990. if align == 3:
  13991. nres += _format_g(spacing) + " Tw "
  13992. if color is not None:
  13993. nres += color_str
  13994. if fill is not None:
  13995. nres += fill_str
  13996. nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering)
  13997. nres += "ET\n%sQ\n" % emc
  13998. self.text_cont += nres
  13999. self.updateRect(rect)
  14000. return more
  14001. def finish(
  14002. self,
  14003. width: float = 1,
  14004. color: OptSeq = (0,),
  14005. fill: OptSeq = None,
  14006. lineCap: int = 0,
  14007. lineJoin: int = 0,
  14008. dashes: OptStr = None,
  14009. even_odd: bool = False,
  14010. morph: OptSeq = None,
  14011. closePath: bool = True,
  14012. fill_opacity: float = 1,
  14013. stroke_opacity: float = 1,
  14014. oc: int = 0,
  14015. ) -> None:
  14016. """Finish the current drawing segment.
  14017. Notes:
  14018. Apply colors, opacity, dashes, line style and width, or
  14019. morphing. Also whether to close the path
  14020. by connecting last to first point.
  14021. """
  14022. if self.draw_cont == "": # treat empty contents as no-op
  14023. return
  14024. if width == 0: # border color makes no sense then
  14025. color = None
  14026. elif color is None: # vice versa
  14027. width = 0
  14028. # if color == None and fill == None:
  14029. # raise ValueError("at least one of 'color' or 'fill' must be given")
  14030. color_str = ColorCode(color, "c") # ensure proper color string
  14031. fill_str = ColorCode(fill, "f") # ensure proper fill string
  14032. optcont = self.page._get_optional_content(oc)
  14033. if optcont is not None:
  14034. self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
  14035. emc = "EMC\n"
  14036. else:
  14037. emc = ""
  14038. alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
  14039. if alpha is not None:
  14040. self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
  14041. if width != 1 and width != 0:
  14042. self.draw_cont += _format_g(width) + " w\n"
  14043. if lineCap != 0:
  14044. self.draw_cont = "%i J\n" % lineCap + self.draw_cont
  14045. if lineJoin != 0:
  14046. self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
  14047. if dashes not in (None, "", "[] 0"):
  14048. self.draw_cont = "%s d\n" % dashes + self.draw_cont
  14049. if closePath:
  14050. self.draw_cont += "h\n"
  14051. self.last_point = None
  14052. if color is not None:
  14053. self.draw_cont += color_str
  14054. if fill is not None:
  14055. self.draw_cont += fill_str
  14056. if color is not None:
  14057. if not even_odd:
  14058. self.draw_cont += "B\n"
  14059. else:
  14060. self.draw_cont += "B*\n"
  14061. else:
  14062. if not even_odd:
  14063. self.draw_cont += "f\n"
  14064. else:
  14065. self.draw_cont += "f*\n"
  14066. else:
  14067. self.draw_cont += "S\n"
  14068. self.draw_cont += emc
  14069. if CheckMorph(morph):
  14070. m1 = Matrix(
  14071. 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
  14072. )
  14073. mat = ~m1 * morph[1] * m1
  14074. self.draw_cont = _format_g(JM_TUPLE(mat)) + " cm\n" + self.draw_cont
  14075. self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
  14076. self.draw_cont = ""
  14077. self.last_point = None
  14078. return
  14079. def commit(self, overlay: bool = True) -> None:
  14080. """Update the page's /Contents object with Shape data.
  14081. The argument controls whether data appear in foreground (default)
  14082. or background.
  14083. """
  14084. CheckParent(self.page) # doc may have died meanwhile
  14085. self.totalcont += self.text_cont
  14086. self.totalcont = self.totalcont.encode()
  14087. if self.totalcont:
  14088. if overlay:
  14089. self.page.wrap_contents() # ensure a balanced graphics state
  14090. # make /Contents object with dummy stream
  14091. xref = TOOLS._insert_contents(self.page, b" ", overlay)
  14092. # update it with potential compression
  14093. self.doc.update_stream(xref, self.totalcont)
  14094. self.last_point = None # clean up ...
  14095. self.rect = None #
  14096. self.draw_cont = "" # for potential ...
  14097. self.text_cont = "" # ...
  14098. self.totalcont = "" # re-use
  14099. class Story:
  14100. def __init__( self, html='', user_css=None, em=12, archive=None):
  14101. buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8'))
  14102. if archive and not isinstance(archive, Archive):
  14103. archive = Archive(archive)
  14104. arch = archive.this if archive else mupdf.FzArchive( None)
  14105. if hasattr(mupdf, 'FzStoryS'):
  14106. self.this = mupdf.FzStoryS( buffer_, user_css, em, arch)
  14107. else:
  14108. self.this = mupdf.FzStory( buffer_, user_css, em, arch)
  14109. def add_header_ids(self):
  14110. '''
  14111. Look for `<h1..6>` items in `self` and adds unique `id`
  14112. attributes if not already present.
  14113. '''
  14114. dom = self.body
  14115. i = 0
  14116. x = dom.find(None, None, None)
  14117. while x:
  14118. name = x.tagname
  14119. if len(name) == 2 and name[0]=="h" and name[1] in "123456":
  14120. attr = x.get_attribute_value("id")
  14121. if not attr:
  14122. id_ = f"h_id_{i}"
  14123. #log(f"{name=}: setting {id_=}")
  14124. x.set_attribute("id", id_)
  14125. i += 1
  14126. x = x.find_next(None, None, None)
  14127. @staticmethod
  14128. def add_pdf_links(document_or_stream, positions):
  14129. """
  14130. Adds links to PDF document.
  14131. Args:
  14132. document_or_stream:
  14133. A PDF `Document` or raw PDF content, for example an
  14134. `io.BytesIO` instance.
  14135. positions:
  14136. List of `ElementPosition`'s for `document_or_stream`,
  14137. typically from Story.element_positions(). We raise an
  14138. exception if two or more positions have same id.
  14139. Returns:
  14140. `document_or_stream` if a `Document` instance, otherwise a
  14141. new `Document` instance.
  14142. We raise an exception if an `href` in `positions` refers to an
  14143. internal position `#<name>` but no item in `positions` has `id =
  14144. name`.
  14145. """
  14146. if isinstance(document_or_stream, Document):
  14147. document = document_or_stream
  14148. else:
  14149. document = Document("pdf", document_or_stream)
  14150. # Create dict from id to position, which we will use to find
  14151. # link destinations.
  14152. #
  14153. id_to_position = dict()
  14154. #log(f"positions: {positions}")
  14155. for position in positions:
  14156. #log(f"add_pdf_links(): position: {position}")
  14157. if (position.open_close & 1) and position.id:
  14158. #log(f"add_pdf_links(): position with id: {position}")
  14159. if position.id in id_to_position:
  14160. #log(f"Ignoring duplicate positions with id={position.id!r}")
  14161. pass
  14162. else:
  14163. id_to_position[ position.id] = position
  14164. # Insert links for all positions that have an `href`.
  14165. #
  14166. for position_from in positions:
  14167. if (position_from.open_close & 1) and position_from.href:
  14168. #log(f"add_pdf_links(): position with href: {position}")
  14169. link = dict()
  14170. link['from'] = Rect(position_from.rect)
  14171. if position_from.href.startswith("#"):
  14172. #`<a href="#...">...</a>` internal link.
  14173. target_id = position_from.href[1:]
  14174. try:
  14175. position_to = id_to_position[ target_id]
  14176. except Exception as e:
  14177. if g_exceptions_verbose > 1: exception_info()
  14178. raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") from e
  14179. # Make link from `position_from`'s rect to top-left of
  14180. # `position_to`'s rect.
  14181. if 0:
  14182. log(f"add_pdf_links(): making link from:")
  14183. log(f"add_pdf_links(): {position_from}")
  14184. log(f"add_pdf_links(): to:")
  14185. log(f"add_pdf_links(): {position_to}")
  14186. link["kind"] = LINK_GOTO
  14187. x0, y0, x1, y1 = position_to.rect
  14188. # This appears to work well with viewers which scroll
  14189. # to make destination point top-left of window.
  14190. link["to"] = Point(x0, y0)
  14191. link["page"] = position_to.page_num - 1
  14192. else:
  14193. # `<a href="...">...</a>` external link.
  14194. if position_from.href.startswith('name:'):
  14195. link['kind'] = LINK_NAMED
  14196. link['name'] = position_from.href[5:]
  14197. else:
  14198. link['kind'] = LINK_URI
  14199. link['uri'] = position_from.href
  14200. #log(f'Adding link: {position_from.page_num=} {link=}.')
  14201. document[position_from.page_num - 1].insert_link(link)
  14202. return document
  14203. @property
  14204. def body(self):
  14205. dom = self.document()
  14206. return dom.bodytag()
  14207. def document( self):
  14208. dom = mupdf.fz_story_document( self.this)
  14209. return Xml( dom)
  14210. def draw( self, device, matrix=None):
  14211. ctm2 = JM_matrix_from_py( matrix)
  14212. dev = device.this if device else mupdf.FzDevice( None)
  14213. mupdf.fz_draw_story( self.this, dev, ctm2)
  14214. def element_positions( self, function, args=None):
  14215. '''
  14216. Trigger a callback function to record where items have been placed.
  14217. '''
  14218. if type(args) is dict:
  14219. for k in args.keys():
  14220. if not (type(k) is str and k.isidentifier()):
  14221. raise ValueError(f"invalid key '{k}'")
  14222. else:
  14223. args = {}
  14224. if not callable(function) or function.__code__.co_argcount != 1:
  14225. raise ValueError("callback 'function' must be a callable with exactly one argument")
  14226. def function2( position):
  14227. class Position2:
  14228. pass
  14229. position2 = Position2()
  14230. position2.depth = position.depth
  14231. position2.heading = position.heading
  14232. position2.id = position.id
  14233. position2.rect = JM_py_from_rect(position.rect)
  14234. position2.text = position.text
  14235. position2.open_close = position.open_close
  14236. position2.rect_num = position.rectangle_num
  14237. position2.href = position.href
  14238. if args:
  14239. for k, v in args.items():
  14240. setattr( position2, k, v)
  14241. function( position2)
  14242. mupdf.fz_story_positions( self.this, function2)
  14243. def place( self, where, flags=0):
  14244. '''
  14245. Wrapper for fz_place_story_flags().
  14246. '''
  14247. where = JM_rect_from_py( where)
  14248. filled = mupdf.FzRect()
  14249. more = mupdf.fz_place_story_flags( self.this, where, filled, flags)
  14250. return more, JM_py_from_rect( filled)
  14251. def reset( self):
  14252. mupdf.fz_reset_story( self.this)
  14253. def write(self, writer, rectfn, positionfn=None, pagefn=None):
  14254. dev = None
  14255. page_num = 0
  14256. rect_num = 0
  14257. filled = Rect(0, 0, 0, 0)
  14258. while 1:
  14259. mediabox, rect, ctm = rectfn(rect_num, filled)
  14260. rect_num += 1
  14261. if mediabox:
  14262. # new page.
  14263. page_num += 1
  14264. more, filled = self.place( rect)
  14265. if positionfn:
  14266. def positionfn2(position):
  14267. # We add a `.page_num` member to the
  14268. # `ElementPosition` instance.
  14269. position.page_num = page_num
  14270. positionfn(position)
  14271. self.element_positions(positionfn2)
  14272. if writer:
  14273. if mediabox:
  14274. # new page.
  14275. if dev:
  14276. if pagefn:
  14277. pagefn(page_num, mediabox, dev, 1)
  14278. writer.end_page()
  14279. dev = writer.begin_page( mediabox)
  14280. if pagefn:
  14281. pagefn(page_num, mediabox, dev, 0)
  14282. self.draw( dev, ctm)
  14283. if not more:
  14284. if pagefn:
  14285. pagefn( page_num, mediabox, dev, 1)
  14286. writer.end_page()
  14287. else:
  14288. self.draw(None, ctm)
  14289. if not more:
  14290. break
  14291. @staticmethod
  14292. def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
  14293. positions = list()
  14294. content = None
  14295. # Iterate until stable.
  14296. while 1:
  14297. content_prev = content
  14298. content = contentfn( positions)
  14299. stable = False
  14300. if content == content_prev:
  14301. stable = True
  14302. content2 = content
  14303. story = Story(content2, user_css, em, archive)
  14304. if add_header_ids:
  14305. story.add_header_ids()
  14306. positions = list()
  14307. def positionfn2(position):
  14308. #log(f"write_stabilized(): {stable=} {positionfn=} {position=}")
  14309. positions.append(position)
  14310. if stable and positionfn:
  14311. positionfn(position)
  14312. story.write(
  14313. writer if stable else None,
  14314. rectfn,
  14315. positionfn2,
  14316. pagefn,
  14317. )
  14318. if stable:
  14319. break
  14320. @staticmethod
  14321. def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
  14322. #log("write_stabilized_with_links()")
  14323. stream = io.BytesIO()
  14324. writer = DocumentWriter(stream)
  14325. positions = []
  14326. def positionfn2(position):
  14327. #log(f"write_stabilized_with_links(): {position=}")
  14328. positions.append(position)
  14329. if positionfn:
  14330. positionfn(position)
  14331. Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids)
  14332. writer.close()
  14333. stream.seek(0)
  14334. return Story.add_pdf_links(stream, positions)
  14335. def write_with_links(self, rectfn, positionfn=None, pagefn=None):
  14336. #log("write_with_links()")
  14337. stream = io.BytesIO()
  14338. writer = DocumentWriter(stream)
  14339. positions = []
  14340. def positionfn2(position):
  14341. #log(f"write_with_links(): {position=}")
  14342. positions.append(position)
  14343. if positionfn:
  14344. positionfn(position)
  14345. self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn)
  14346. writer.close()
  14347. stream.seek(0)
  14348. return Story.add_pdf_links(stream, positions)
  14349. class FitResult:
  14350. '''
  14351. The result from a `Story.fit*()` method.
  14352. Members:
  14353. `big_enough`:
  14354. `True` if the fit succeeded.
  14355. `filled`:
  14356. Tuple (x0, y0, x1, y1) from the last call to `Story.place()`. This
  14357. will be wider than .rect if any single word (which we never split)
  14358. was too wide for .rect.
  14359. `more`:
  14360. `False` if the fit succeeded.
  14361. `numcalls`:
  14362. Number of calls made to `self.place()`.
  14363. `parameter`:
  14364. The successful parameter value, or the largest failing value.
  14365. `rect`:
  14366. The pumupdf.Rect created from `parameter`.
  14367. '''
  14368. def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
  14369. self.big_enough = big_enough
  14370. self.filled = filled
  14371. self.more = more
  14372. self.numcalls = numcalls
  14373. self.parameter = parameter
  14374. self.rect = rect
  14375. def __repr__(self):
  14376. return (
  14377. f' big_enough={self.big_enough}'
  14378. f' filled={self.filled}'
  14379. f' more={self.more}'
  14380. f' numcalls={self.numcalls}'
  14381. f' parameter={self.parameter}'
  14382. f' rect={self.rect}'
  14383. )
  14384. def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False, flags=0):
  14385. '''
  14386. Finds optimal rect that contains the story `self`.
  14387. Returns a `Story.FitResult` instance.
  14388. On success, the last call to `self.place()` will have been with the
  14389. returned rectangle, so `self.draw()` can be used directly.
  14390. Args:
  14391. :arg fn:
  14392. A callable taking a floating point `parameter` and returning a
  14393. `pymupdf.Rect()`. If the rect is empty, we assume the story will
  14394. not fit and do not call `self.place()`.
  14395. Must guarantee that `self.place()` behaves monotonically when
  14396. given rect `fn(parameter`) as `parameter` increases. This
  14397. usually means that both width and height increase or stay
  14398. unchanged as `parameter` increases.
  14399. :arg pmin:
  14400. Minimum parameter to consider; `None` for -infinity.
  14401. :arg pmax:
  14402. Maximum parameter to consider; `None` for +infinity.
  14403. :arg delta:
  14404. Maximum error in returned `parameter`.
  14405. :arg verbose:
  14406. If true we output diagnostics.
  14407. :arg flags:
  14408. Passed to mupdf.fz_place_story_flags(). e.g.
  14409. zero or `mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW`.
  14410. '''
  14411. def log(text):
  14412. assert verbose
  14413. message(f'fit(): {text}')
  14414. assert isinstance(pmin, (int, float)) or pmin is None
  14415. assert isinstance(pmax, (int, float)) or pmax is None
  14416. class State:
  14417. def __init__(self):
  14418. self.pmin = pmin
  14419. self.pmax = pmax
  14420. self.pmin_result = None
  14421. self.pmax_result = None
  14422. self.result = None
  14423. self.numcalls = 0
  14424. if verbose:
  14425. self.pmin0 = pmin
  14426. self.pmax0 = pmax
  14427. state = State()
  14428. if verbose:
  14429. log(f'starting. {state.pmin=} {state.pmax=}.')
  14430. self.reset()
  14431. def ret():
  14432. if state.pmax is not None:
  14433. if state.last_p != state.pmax:
  14434. if verbose:
  14435. log(f'Calling update() with pmax, because was overwritten by later calls.')
  14436. big_enough = update(state.pmax)
  14437. assert big_enough
  14438. result = state.pmax_result
  14439. else:
  14440. result = state.pmin_result if state.pmin_result else Story.FitResult(numcalls=state.numcalls)
  14441. if verbose:
  14442. log(f'finished. {state.pmin0=} {state.pmax0=} {state.pmax=}: returning {result=}')
  14443. return result
  14444. def update(parameter):
  14445. '''
  14446. Evaluates `more, _ = self.place(fn(parameter))`. If `more` is
  14447. false, then `rect` is big enough to contain `self` and we
  14448. set `state.pmax=parameter` and return True. Otherwise we set
  14449. `state.pmin=parameter` and return False.
  14450. '''
  14451. rect = fn(parameter)
  14452. assert isinstance(rect, Rect), f'{type(rect)=} {rect=}'
  14453. if rect.is_empty:
  14454. big_enough = False
  14455. result = Story.FitResult(parameter=parameter, numcalls=state.numcalls)
  14456. if verbose:
  14457. log(f'update(): not calling self.place() because rect is empty.')
  14458. else:
  14459. more, filled = self.place(rect, flags)
  14460. state.numcalls += 1
  14461. big_enough = not more
  14462. result = Story.FitResult(
  14463. filled=filled,
  14464. more=more,
  14465. numcalls=state.numcalls,
  14466. parameter=parameter,
  14467. rect=rect,
  14468. big_enough=big_enough,
  14469. )
  14470. if verbose:
  14471. log(f'update(): called self.place(): {state.numcalls:>2d}: {more=} {parameter=} {rect=}.')
  14472. if big_enough:
  14473. state.pmax = parameter
  14474. state.pmax_result = result
  14475. else:
  14476. state.pmin = parameter
  14477. state.pmin_result = result
  14478. state.last_p = parameter
  14479. return big_enough
  14480. def opposite(p, direction):
  14481. '''
  14482. Returns same sign as `direction`, larger or smaller than `p` if
  14483. direction is positive or negative respectively.
  14484. '''
  14485. if p is None or p==0:
  14486. return direction
  14487. if direction * p > 0:
  14488. return 2 * p
  14489. return -p
  14490. if state.pmin is None:
  14491. # Find an initial finite pmin value.
  14492. if verbose: log(f'finding pmin.')
  14493. parameter = opposite(state.pmax, -1)
  14494. while 1:
  14495. if not update(parameter):
  14496. break
  14497. parameter *= 2
  14498. else:
  14499. if update(state.pmin):
  14500. if verbose: log(f'{state.pmin=} is big enough.')
  14501. return ret()
  14502. if state.pmax is None:
  14503. # Find an initial finite pmax value.
  14504. if verbose: log(f'finding pmax.')
  14505. parameter = opposite(state.pmin, +1)
  14506. while 1:
  14507. if update(parameter):
  14508. break
  14509. parameter *= 2
  14510. else:
  14511. if not update(state.pmax):
  14512. # No solution possible.
  14513. state.pmax = None
  14514. if verbose: log(f'No solution possible {state.pmax=}.')
  14515. return ret()
  14516. # Do binary search in pmin..pmax.
  14517. if verbose: log(f'doing binary search with {state.pmin=} {state.pmax=}.')
  14518. while 1:
  14519. if state.pmax - state.pmin < delta:
  14520. return ret()
  14521. parameter = (state.pmin + state.pmax) / 2
  14522. update(parameter)
  14523. def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False, flags=0):
  14524. '''
  14525. Finds smallest value `scale` in range `scale_min..scale_max` where
  14526. `scale * rect` is large enough to contain the story `self`.
  14527. Returns a `Story.FitResult` instance with `.parameter` set to `scale`.
  14528. :arg width:
  14529. width of rect.
  14530. :arg height:
  14531. height of rect.
  14532. :arg scale_min:
  14533. Minimum scale to consider; must be >= 0.
  14534. :arg scale_max:
  14535. Maximum scale to consider, must be >= scale_min or `None` for
  14536. infinite.
  14537. :arg delta:
  14538. Maximum error in returned scale.
  14539. :arg verbose:
  14540. If true we output diagnostics.
  14541. :arg flags:
  14542. Passed to Story.place().
  14543. '''
  14544. x0, y0, x1, y1 = rect
  14545. width = x1 - x0
  14546. height = y1 - y0
  14547. def fn(scale):
  14548. return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
  14549. return self.fit(fn, scale_min, scale_max, delta, verbose, flags)
  14550. def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
  14551. '''
  14552. Finds smallest height in range `height_min..height_max` where a rect
  14553. with size `(width, height)` is large enough to contain the story
  14554. `self`.
  14555. Returns a `Story.FitResult` instance.
  14556. :arg width:
  14557. width of rect.
  14558. :arg height_min:
  14559. Minimum height to consider; must be >= 0.
  14560. :arg height_max:
  14561. Maximum height to consider, must be >= height_min or `None` for
  14562. infinite.
  14563. :arg origin:
  14564. `(x0, y0)` of rect.
  14565. :arg delta:
  14566. Maximum error in returned height.
  14567. :arg verbose:
  14568. If true we output diagnostics.
  14569. '''
  14570. x0, y0 = origin
  14571. x1 = x0 + width
  14572. def fn(height):
  14573. return Rect(x0, y0, x1, y0+height)
  14574. return self.fit(fn, height_min, height_max, delta, verbose)
  14575. def fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False):
  14576. '''
  14577. Finds smallest width in range `width_min..width_max` where a rect with size
  14578. `(width, height)` is large enough to contain the story `self`.
  14579. Returns a `Story.FitResult` instance.
  14580. Returns a `FitResult` instance.
  14581. :arg height:
  14582. height of rect.
  14583. :arg width_min:
  14584. Minimum width to consider; must be >= 0.
  14585. :arg width_max:
  14586. Maximum width to consider, must be >= width_min or `None` for
  14587. infinite.
  14588. :arg origin:
  14589. `(x0, y0)` of rect.
  14590. :arg delta:
  14591. Maximum error in returned width.
  14592. :arg verbose:
  14593. If true we output diagnostics.
  14594. '''
  14595. x0, y0 = origin
  14596. y1 = y0 + height
  14597. def fn(width):
  14598. return Rect(x0, y0, x0+width, y1)
  14599. return self.fit(fn, width_min, width_max, delta, verbose)
  14600. class TextPage:
  14601. def __init__(self, *args):
  14602. if args_match(args, mupdf.FzRect):
  14603. mediabox = args[0]
  14604. self.this = mupdf.FzStextPage( mediabox)
  14605. elif args_match(args, mupdf.FzStextPage):
  14606. self.this = args[0]
  14607. else:
  14608. raise Exception(f'Unrecognised args: {args}')
  14609. self.thisown = True
  14610. self.parent = None
  14611. def _extractText(self, format_):
  14612. this_tpage = self.this
  14613. res = mupdf.fz_new_buffer(1024)
  14614. out = mupdf.FzOutput( res)
  14615. # fixme: mupdfwrap.py thinks fz_output is not copyable, possibly
  14616. # because there is no .refs member visible and no fz_keep_output() fn,
  14617. # although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer()
  14618. # doesn't convert the returned fz_output* into a mupdf.FzOutput.
  14619. #out = mupdf.FzOutput(out)
  14620. if format_ == 1:
  14621. mupdf.fz_print_stext_page_as_html(out, this_tpage, 0)
  14622. elif format_ == 3:
  14623. mupdf.fz_print_stext_page_as_xml(out, this_tpage, 0)
  14624. elif format_ == 4:
  14625. mupdf.fz_print_stext_page_as_xhtml(out, this_tpage, 0)
  14626. else:
  14627. JM_print_stext_page_as_text(res, this_tpage)
  14628. out.fz_close_output()
  14629. text = JM_EscapeStrFromBuffer(res)
  14630. return text
  14631. def _getNewBlockList(self, page_dict, raw):
  14632. JM_make_textpage_dict(self.this, page_dict, raw)
  14633. def _textpage_dict(self, raw=False):
  14634. page_dict = {"width": self.rect.width, "height": self.rect.height}
  14635. self._getNewBlockList(page_dict, raw)
  14636. return page_dict
  14637. def extractBLOCKS(self):
  14638. """Return a list with text block information."""
  14639. if 1 or g_use_extra:
  14640. return extra.extractBLOCKS(self.this)
  14641. block_n = -1
  14642. this_tpage = self.this
  14643. tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
  14644. res = mupdf.fz_new_buffer(1024)
  14645. lines = []
  14646. for block in this_tpage:
  14647. block_n += 1
  14648. blockrect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  14649. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  14650. mupdf.fz_clear_buffer(res) # set text buffer to empty
  14651. line_n = -1
  14652. last_char = 0
  14653. for line in block:
  14654. line_n += 1
  14655. linerect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  14656. for ch in line:
  14657. cbbox = JM_char_bbox(line, ch)
  14658. if (not JM_rects_overlap(tp_rect, cbbox)
  14659. and not mupdf.fz_is_infinite_rect(tp_rect)
  14660. ):
  14661. continue
  14662. JM_append_rune(res, ch.m_internal.c)
  14663. last_char = ch.m_internal.c
  14664. linerect = mupdf.fz_union_rect(linerect, cbbox)
  14665. if last_char != 10 and not mupdf.fz_is_empty_rect(linerect):
  14666. mupdf.fz_append_byte(res, 10)
  14667. blockrect = mupdf.fz_union_rect(blockrect, linerect)
  14668. text = JM_EscapeStrFromBuffer(res)
  14669. elif (JM_rects_overlap(tp_rect, block.m_internal.bbox)
  14670. or mupdf.fz_is_infinite_rect(tp_rect)
  14671. ):
  14672. img = block.i_image()
  14673. cs = img.colorspace()
  14674. text = "<image: %s, width: %d, height: %d, bpc: %d>" % (
  14675. mupdf.fz_colorspace_name(cs),
  14676. img.w(), img.h(), img.bpc()
  14677. )
  14678. blockrect = mupdf.fz_union_rect(blockrect, mupdf.FzRect(block.m_internal.bbox))
  14679. if not mupdf.fz_is_empty_rect(blockrect):
  14680. litem = (
  14681. blockrect.x0,
  14682. blockrect.y0,
  14683. blockrect.x1,
  14684. blockrect.y1,
  14685. text,
  14686. block_n,
  14687. block.m_internal.type,
  14688. )
  14689. lines.append(litem)
  14690. return lines
  14691. def extractDICT(self, cb=None, sort=False) -> dict:
  14692. """Return page content as a Python dict of images and text spans."""
  14693. val = self._textpage_dict(raw=False)
  14694. if cb is not None:
  14695. val["width"] = cb.width
  14696. val["height"] = cb.height
  14697. if sort:
  14698. blocks = val["blocks"]
  14699. blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
  14700. val["blocks"] = blocks
  14701. return val
  14702. def extractHTML(self) -> str:
  14703. """Return page content as a HTML string."""
  14704. return self._extractText(1)
  14705. def extractIMGINFO(self, hashes=0):
  14706. """Return a list with image meta information."""
  14707. block_n = -1
  14708. this_tpage = self.this
  14709. rc = []
  14710. for block in this_tpage:
  14711. block_n += 1
  14712. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  14713. continue
  14714. img = block.i_image()
  14715. img_size = 0
  14716. mask = img.mask()
  14717. if mask.m_internal:
  14718. has_mask = True
  14719. else:
  14720. has_mask = False
  14721. compr_buff = mupdf.fz_compressed_image_buffer(img)
  14722. if compr_buff.m_internal:
  14723. img_size = compr_buff.fz_compressed_buffer_size()
  14724. compr_buff = None
  14725. if hashes:
  14726. r = mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
  14727. assert mupdf.fz_is_infinite_irect(r)
  14728. m = mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0)
  14729. pix, w, h = mupdf.fz_get_pixmap_from_image(img, r, m)
  14730. digest = mupdf.fz_md5_pixmap2(pix)
  14731. digest = bytes(digest)
  14732. if img_size == 0:
  14733. img_size = img.w() * img.h() * img.n()
  14734. cs = mupdf.FzColorspace(mupdf.ll_fz_keep_colorspace(img.m_internal.colorspace))
  14735. block_dict = dict()
  14736. block_dict[dictkey_number] = block_n
  14737. block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
  14738. block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
  14739. block_dict[dictkey_width] = img.w()
  14740. block_dict[dictkey_height] = img.h()
  14741. block_dict[dictkey_colorspace] = mupdf.fz_colorspace_n(cs)
  14742. block_dict[dictkey_cs_name] = mupdf.fz_colorspace_name(cs)
  14743. block_dict[dictkey_xres] = img.xres()
  14744. block_dict[dictkey_yres] = img.yres()
  14745. block_dict[dictkey_bpc] = img.bpc()
  14746. block_dict[dictkey_size] = img_size
  14747. if hashes:
  14748. block_dict["digest"] = digest
  14749. block_dict["has-mask"] = has_mask
  14750. rc.append(block_dict)
  14751. return rc
  14752. def extractJSON(self, cb=None, sort=False) -> str:
  14753. """Return 'extractDICT' converted to JSON format."""
  14754. import base64
  14755. import json
  14756. val = self._textpage_dict(raw=False)
  14757. class b64encode(json.JSONEncoder):
  14758. def default(self, s):
  14759. if type(s) in (bytes, bytearray):
  14760. return base64.b64encode(s).decode()
  14761. if cb is not None:
  14762. val["width"] = cb.width
  14763. val["height"] = cb.height
  14764. if sort:
  14765. blocks = val["blocks"]
  14766. blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
  14767. val["blocks"] = blocks
  14768. val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
  14769. return val
  14770. def extractRAWDICT(self, cb=None, sort=False) -> dict:
  14771. """Return page content as a Python dict of images and text characters."""
  14772. val = self._textpage_dict(raw=True)
  14773. if cb is not None:
  14774. val["width"] = cb.width
  14775. val["height"] = cb.height
  14776. if sort:
  14777. blocks = val["blocks"]
  14778. blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
  14779. val["blocks"] = blocks
  14780. return val
  14781. def extractRAWJSON(self, cb=None, sort=False) -> str:
  14782. """Return 'extractRAWDICT' converted to JSON format."""
  14783. import base64
  14784. import json
  14785. val = self._textpage_dict(raw=True)
  14786. class b64encode(json.JSONEncoder):
  14787. def default(self,s):
  14788. if type(s) in (bytes, bytearray):
  14789. return base64.b64encode(s).decode()
  14790. if cb is not None:
  14791. val["width"] = cb.width
  14792. val["height"] = cb.height
  14793. if sort:
  14794. blocks = val["blocks"]
  14795. blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
  14796. val["blocks"] = blocks
  14797. val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
  14798. return val
  14799. def extractSelection(self, pointa, pointb):
  14800. a = JM_point_from_py(pointa)
  14801. b = JM_point_from_py(pointb)
  14802. found = mupdf.fz_copy_selection(self.this, a, b, 0)
  14803. return found
  14804. def extractText(self, sort=False) -> str:
  14805. """Return simple, bare text on the page."""
  14806. if not sort:
  14807. return self._extractText(0)
  14808. blocks = self.extractBLOCKS()[:]
  14809. blocks.sort(key=lambda b: (b[3], b[0]))
  14810. return "".join([b[4] for b in blocks])
  14811. def extractTextbox(self, rect):
  14812. this_tpage = self.this
  14813. assert isinstance(this_tpage, mupdf.FzStextPage)
  14814. area = JM_rect_from_py(rect)
  14815. found = JM_copy_rectangle(this_tpage, area)
  14816. rc = PyUnicode_DecodeRawUnicodeEscape(found)
  14817. return rc
  14818. def extractWORDS(self, delimiters=None):
  14819. """Return a list with text word information."""
  14820. if 1 or g_use_extra:
  14821. return extra.extractWORDS(self.this, delimiters)
  14822. buflen = 0
  14823. last_char_rtl = 0
  14824. block_n = -1
  14825. wbbox = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word bbox
  14826. this_tpage = self.this
  14827. tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
  14828. lines = None
  14829. buff = mupdf.fz_new_buffer(64)
  14830. lines = []
  14831. for block in this_tpage:
  14832. block_n += 1
  14833. if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
  14834. continue
  14835. line_n = -1
  14836. for line in block:
  14837. line_n += 1
  14838. word_n = 0 # word counter per line
  14839. mupdf.fz_clear_buffer(buff) # reset word buffer
  14840. buflen = 0 # reset char counter
  14841. for ch in line:
  14842. cbbox = JM_char_bbox(line, ch)
  14843. if (not JM_rects_overlap(tp_rect, cbbox)
  14844. and not mupdf.fz_is_infinite_rect(tp_rect)
  14845. ):
  14846. continue
  14847. if buflen == 0 and ch.m_internal.c == 0x200d:
  14848. # ZERO WIDTH JOINER cannot start a word
  14849. continue
  14850. word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
  14851. this_char_rtl = JM_is_rtl_char(ch.m_internal.c)
  14852. if word_delimiter or this_char_rtl != last_char_rtl:
  14853. if buflen == 0 and word_delimiter:
  14854. continue # skip delimiters at line start
  14855. if not mupdf.fz_is_empty_rect(wbbox):
  14856. word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
  14857. mupdf.fz_clear_buffer(buff)
  14858. buflen = 0 # reset char counter
  14859. if word_delimiter:
  14860. continue
  14861. # append one unicode character to the word
  14862. JM_append_rune(buff, ch.m_internal.c)
  14863. last_char_rtl = this_char_rtl
  14864. buflen += 1
  14865. # enlarge word bbox
  14866. wbbox = mupdf.fz_union_rect(wbbox, JM_char_bbox(line, ch))
  14867. if buflen and not mupdf.fz_is_empty_rect(wbbox):
  14868. word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
  14869. buflen = 0
  14870. return lines
  14871. def extractXHTML(self) -> str:
  14872. """Return page content as a XHTML string."""
  14873. return self._extractText(4)
  14874. def extractXML(self) -> str:
  14875. """Return page content as a XML string."""
  14876. return self._extractText(3)
  14877. def poolsize(self):
  14878. """TextPage current poolsize."""
  14879. tpage = self.this
  14880. pool = mupdf.Pool(tpage.m_internal.pool)
  14881. size = mupdf.fz_pool_size( pool)
  14882. pool.m_internal = None # Ensure that pool's destructor does not free the pool.
  14883. return size
  14884. @property
  14885. def rect(self):
  14886. """Page rectangle."""
  14887. this_tpage = self.this
  14888. mediabox = this_tpage.m_internal.mediabox
  14889. val = JM_py_from_rect(mediabox)
  14890. val = Rect(val)
  14891. return val
  14892. def search(self, needle, hit_max=0, quads=1):
  14893. """Locate 'needle' returning rects or quads."""
  14894. val = JM_search_stext_page(self.this, needle)
  14895. if not val:
  14896. return val
  14897. items = len(val)
  14898. for i in range(items): # change entries to quads or rects
  14899. q = Quad(val[i])
  14900. if quads:
  14901. val[i] = q
  14902. else:
  14903. val[i] = q.rect
  14904. if quads:
  14905. return val
  14906. i = 0 # join overlapping rects on the same line
  14907. while i < items - 1:
  14908. v1 = val[i]
  14909. v2 = val[i + 1]
  14910. if v1.y1 != v2.y1 or (v1 & v2).is_empty:
  14911. i += 1
  14912. continue # no overlap on same line
  14913. val[i] = v1 | v2 # join rectangles
  14914. del val[i + 1] # remove v2
  14915. items -= 1 # reduce item count
  14916. return val
  14917. extractTEXT = extractText
  14918. class TextWriter:
  14919. def __init__(self, page_rect, opacity=1, color=None):
  14920. """Stores text spans for later output on compatible PDF pages."""
  14921. self.this = mupdf.fz_new_text()
  14922. self.opacity = opacity
  14923. self.color = color
  14924. self.rect = Rect(page_rect)
  14925. self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height)
  14926. self.ictm = ~self.ctm
  14927. self.last_point = Point()
  14928. self.last_point.__doc__ = "Position following last text insertion."
  14929. self.text_rect = Rect()
  14930. self.text_rect.__doc__ = "Accumulated area of text spans."
  14931. self.used_fonts = set()
  14932. self.thisown = True
  14933. @property
  14934. def _bbox(self):
  14935. val = JM_py_from_rect( mupdf.fz_bound_text( self.this, mupdf.FzStrokeState(None), mupdf.FzMatrix()))
  14936. val = Rect(val)
  14937. return val
  14938. def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left=0, small_caps=0):
  14939. """Store 'text' at point 'pos' using 'font' and 'fontsize'."""
  14940. pos = Point(pos) * self.ictm
  14941. #log( '{font=}')
  14942. if font is None:
  14943. font = Font("helv")
  14944. if not font.is_writable:
  14945. if 0:
  14946. log( '{font.this.m_internal.name=}')
  14947. log( '{font.this.m_internal.t3matrix=}')
  14948. log( '{font.this.m_internal.bbox=}')
  14949. log( '{font.this.m_internal.glyph_count=}')
  14950. log( '{font.this.m_internal.use_glyph_bbox=}')
  14951. log( '{font.this.m_internal.width_count=}')
  14952. log( '{font.this.m_internal.width_default=}')
  14953. log( '{font.this.m_internal.has_digest=}')
  14954. log( 'Unsupported font {font.name=}')
  14955. if mupdf_cppyy:
  14956. import cppyy
  14957. log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}')
  14958. raise ValueError("Unsupported font '%s'." % font.name)
  14959. if right_to_left:
  14960. text = self.clean_rtl(text)
  14961. text = "".join(reversed(text))
  14962. right_to_left = 0
  14963. lang = mupdf.fz_text_language_from_string(language)
  14964. p = JM_point_from_py(pos)
  14965. trm = mupdf.fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y)
  14966. markup_dir = 0
  14967. wmode = 0
  14968. if small_caps == 0:
  14969. trm = mupdf.fz_show_string( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
  14970. else:
  14971. trm = JM_show_string_cs( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
  14972. val = JM_py_from_matrix(trm)
  14973. self.last_point = Point(val[-2:]) * self.ctm
  14974. self.text_rect = self._bbox * self.ctm
  14975. val = self.text_rect, self.last_point
  14976. if font.flags["mono"] == 1:
  14977. self.used_fonts.add(font)
  14978. return val
  14979. def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False):
  14980. lheight = fontsize * 1.2
  14981. for c in text:
  14982. self.append(pos, c, font=font, fontsize=fontsize,
  14983. language=language, small_caps=small_caps)
  14984. pos.y += lheight
  14985. return self.text_rect, self.last_point
  14986. def clean_rtl(self, text):
  14987. """Revert the sequence of Latin text parts.
  14988. Text with right-to-left writing direction (Arabic, Hebrew) often
  14989. contains Latin parts, which are written in left-to-right: numbers, names,
  14990. etc. For output as PDF text we need *everything* in right-to-left.
  14991. E.g. an input like "<arabic> ABCDE FG HIJ <arabic> KL <arabic>" will be
  14992. converted to "<arabic> JIH GF EDCBA <arabic> LK <arabic>". The Arabic
  14993. parts remain untouched.
  14994. Args:
  14995. text: str
  14996. Returns:
  14997. Massaged string.
  14998. """
  14999. if not text:
  15000. return text
  15001. # split into words at space boundaries
  15002. words = text.split(" ")
  15003. idx = []
  15004. for i in range(len(words)):
  15005. w = words[i]
  15006. # revert character sequence for Latin only words
  15007. if not (len(w) < 2 or max([ord(c) for c in w]) > 255):
  15008. words[i] = "".join(reversed(w))
  15009. idx.append(i) # stored index of Latin word
  15010. # adjacent Latin words must revert their sequence, too
  15011. idx2 = [] # store indices of adjacent Latin words
  15012. for i in range(len(idx)):
  15013. if idx2 == []: # empty yet?
  15014. idx2.append(idx[i]) # store Latin word number
  15015. elif idx[i] > idx2[-1] + 1: # large gap to last?
  15016. if len(idx2) > 1: # at least two consecutives?
  15017. words[idx2[0] : idx2[-1] + 1] = reversed(
  15018. words[idx2[0] : idx2[-1] + 1]
  15019. ) # revert their sequence
  15020. idx2 = [idx[i]] # re-initialize
  15021. elif idx[i] == idx2[-1] + 1: # new adjacent Latin word
  15022. idx2.append(idx[i])
  15023. text = " ".join(words)
  15024. return text
  15025. def fill_textbox(
  15026. writer: 'TextWriter',
  15027. rect: rect_like,
  15028. text: typing.Union[str, list],
  15029. pos: point_like = None,
  15030. font: typing.Optional[Font] = None,
  15031. fontsize: float = 11,
  15032. lineheight: OptFloat = None,
  15033. align: int = 0,
  15034. warn: bool = None,
  15035. right_to_left: bool = False,
  15036. small_caps: bool = False,
  15037. ) -> tuple:
  15038. """Fill a rectangle with text.
  15039. Args:
  15040. writer: pymupdf.TextWriter object (= "self")
  15041. rect: rect-like to receive the text.
  15042. text: string or list/tuple of strings.
  15043. pos: point-like start position of first word.
  15044. font: pymupdf.Font object (default pymupdf.Font('helv')).
  15045. fontsize: the fontsize.
  15046. lineheight: overwrite the font property
  15047. align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
  15048. warn: (bool) text overflow action: none, warn, or exception
  15049. right_to_left: (bool) indicate right-to-left language.
  15050. """
  15051. rect = Rect(rect)
  15052. if rect.is_empty:
  15053. raise ValueError("fill rect must not empty.")
  15054. if type(font) is not Font:
  15055. font = Font("helv")
  15056. def textlen(x):
  15057. """Return length of a string."""
  15058. return font.text_length(
  15059. x, fontsize=fontsize, small_caps=small_caps
  15060. ) # abbreviation
  15061. def char_lengths(x):
  15062. """Return list of single character lengths for a string."""
  15063. return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)
  15064. def append_this(pos, text):
  15065. ret = writer.append(
  15066. pos, text, font=font, fontsize=fontsize, small_caps=small_caps
  15067. )
  15068. return ret
  15069. tolerance = fontsize * 0.2 # extra distance to left border
  15070. space_len = textlen(" ")
  15071. std_width = rect.width - tolerance
  15072. std_start = rect.x0 + tolerance
  15073. def norm_words(width, words):
  15074. """Cut any word in pieces no longer than 'width'."""
  15075. nwords = []
  15076. word_lengths = []
  15077. for w in words:
  15078. wl_lst = char_lengths(w)
  15079. wl = sum(wl_lst)
  15080. if wl <= width: # nothing to do - copy over
  15081. nwords.append(w)
  15082. word_lengths.append(wl)
  15083. continue
  15084. # word longer than rect width - split it in parts
  15085. n = len(wl_lst)
  15086. while n > 0:
  15087. wl = sum(wl_lst[:n])
  15088. if wl <= width:
  15089. nwords.append(w[:n])
  15090. word_lengths.append(wl)
  15091. w = w[n:]
  15092. wl_lst = wl_lst[n:]
  15093. n = len(wl_lst)
  15094. else:
  15095. n -= 1
  15096. return nwords, word_lengths
  15097. def output_justify(start, line):
  15098. """Justified output of a line."""
  15099. # ignore leading / trailing / multiple spaces
  15100. words = [w for w in line.split(" ") if w != ""]
  15101. nwords = len(words)
  15102. if nwords == 0:
  15103. return
  15104. if nwords == 1: # single word cannot be justified
  15105. append_this(start, words[0])
  15106. return
  15107. tl = sum([textlen(w) for w in words]) # total word lengths
  15108. gaps = nwords - 1 # number of word gaps
  15109. gapl = (std_width - tl) / gaps # width of each gap
  15110. for w in words:
  15111. _, lp = append_this(start, w) # output one word
  15112. start.x = lp.x + gapl # next start at word end plus gap
  15113. return
  15114. asc = font.ascender
  15115. dsc = font.descender
  15116. if not lineheight:
  15117. if asc - dsc <= 1:
  15118. lheight = 1.2
  15119. else:
  15120. lheight = asc - dsc
  15121. else:
  15122. lheight = lineheight
  15123. LINEHEIGHT = fontsize * lheight # effective line height
  15124. width = std_width # available horizontal space
  15125. # starting point of text
  15126. if pos is not None:
  15127. pos = Point(pos)
  15128. else: # default is just below rect top-left
  15129. pos = rect.tl + (tolerance, fontsize * asc)
  15130. if pos not in rect:
  15131. raise ValueError("Text must start in rectangle.")
  15132. # calculate displacement factor for alignment
  15133. if align == TEXT_ALIGN_CENTER:
  15134. factor = 0.5
  15135. elif align == TEXT_ALIGN_RIGHT:
  15136. factor = 1.0
  15137. else:
  15138. factor = 0
  15139. # split in lines if just a string was given
  15140. if type(text) is str:
  15141. textlines = text.splitlines()
  15142. else:
  15143. textlines = []
  15144. for line in text:
  15145. textlines.extend(line.splitlines())
  15146. max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1
  15147. new_lines = [] # the final list of textbox lines
  15148. no_justify = [] # no justify for these line numbers
  15149. for i, line in enumerate(textlines):
  15150. if line in ("", " "):
  15151. new_lines.append((line, space_len))
  15152. width = rect.width - tolerance
  15153. no_justify.append((len(new_lines) - 1))
  15154. continue
  15155. if i == 0:
  15156. width = rect.x1 - pos.x
  15157. else:
  15158. width = rect.width - tolerance
  15159. if right_to_left: # reverses Arabic / Hebrew text front to back
  15160. line = writer.clean_rtl(line)
  15161. tl = textlen(line)
  15162. if tl <= width: # line short enough
  15163. new_lines.append((line, tl))
  15164. no_justify.append((len(new_lines) - 1))
  15165. continue
  15166. # we need to split the line in fitting parts
  15167. words = line.split(" ") # the words in the line
  15168. # cut in parts any words that are longer than rect width
  15169. words, word_lengths = norm_words(width, words)
  15170. n = len(words)
  15171. while True:
  15172. line0 = " ".join(words[:n])
  15173. wl = sum(word_lengths[:n]) + space_len * (n - 1)
  15174. if wl <= width:
  15175. new_lines.append((line0, wl))
  15176. words = words[n:]
  15177. word_lengths = word_lengths[n:]
  15178. n = len(words)
  15179. line0 = None
  15180. else:
  15181. n -= 1
  15182. if len(words) == 0:
  15183. break
  15184. assert n
  15185. # -------------------------------------------------------------------------
  15186. # List of lines created. Each item is (text, tl), where 'tl' is the PDF
  15187. # output length (float) and 'text' is the text. Except for justified text,
  15188. # this is output-ready.
  15189. # -------------------------------------------------------------------------
  15190. nlines = len(new_lines)
  15191. if nlines > max_lines:
  15192. msg = "Only fitting %i of %i lines." % (max_lines, nlines)
  15193. if warn is None:
  15194. pass
  15195. elif warn:
  15196. message("Warning: " + msg)
  15197. else:
  15198. raise ValueError(msg)
  15199. start = Point()
  15200. no_justify += [len(new_lines) - 1] # no justifying of last line
  15201. for i in range(max_lines):
  15202. try:
  15203. line, tl = new_lines.pop(0)
  15204. except IndexError:
  15205. if g_exceptions_verbose >= 2: exception_info()
  15206. break
  15207. if right_to_left: # Arabic, Hebrew
  15208. line = "".join(reversed(line))
  15209. if i == 0: # may have different start for first line
  15210. start = pos
  15211. if align == TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
  15212. output_justify(start, line)
  15213. start.x = std_start
  15214. start.y += LINEHEIGHT
  15215. continue
  15216. if i > 0 or pos.x == std_start: # left, center, right alignments
  15217. start.x += (width - tl) * factor
  15218. append_this(start, line)
  15219. start.x = std_start
  15220. start.y += LINEHEIGHT
  15221. return new_lines # return non-written lines
  15222. def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
  15223. """Write the text to a PDF page having the TextWriter's page size.
  15224. Args:
  15225. page: a PDF page having same size.
  15226. color: override text color.
  15227. opacity: override transparency.
  15228. overlay: put in foreground or background.
  15229. morph: tuple(Point, Matrix), apply a matrix with a fixpoint.
  15230. matrix: Matrix to be used instead of 'morph' argument.
  15231. render_mode: (int) PDF render mode operator 'Tr'.
  15232. """
  15233. CheckParent(page)
  15234. if abs(self.rect - page.rect) > 1e-3:
  15235. raise ValueError("incompatible page rect")
  15236. if morph is not None:
  15237. if (type(morph) not in (tuple, list)
  15238. or type(morph[0]) is not Point
  15239. or type(morph[1]) is not Matrix
  15240. ):
  15241. raise ValueError("morph must be (Point, Matrix) or None")
  15242. if matrix is not None and morph is not None:
  15243. raise ValueError("only one of matrix, morph is allowed")
  15244. if getattr(opacity, "__float__", None) is None or opacity == -1:
  15245. opacity = self.opacity
  15246. if color is None:
  15247. color = self.color
  15248. if 1:
  15249. pdfpage = page._pdf_page()
  15250. alpha = 1
  15251. if opacity >= 0 and opacity < 1:
  15252. alpha = opacity
  15253. ncol = 1
  15254. dev_color = [0, 0, 0, 0]
  15255. if color:
  15256. ncol, dev_color = JM_color_FromSequence(color)
  15257. if ncol == 3:
  15258. colorspace = mupdf.fz_device_rgb()
  15259. elif ncol == 4:
  15260. colorspace = mupdf.fz_device_cmyk()
  15261. else:
  15262. colorspace = mupdf.fz_device_gray()
  15263. resources = mupdf.pdf_new_dict(pdfpage.doc(), 5)
  15264. contents = mupdf.fz_new_buffer(1024)
  15265. dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents)
  15266. #log( '=== {dev_color!r=}')
  15267. mupdf.fz_fill_text(
  15268. dev,
  15269. self.this,
  15270. mupdf.FzMatrix(),
  15271. colorspace,
  15272. dev_color,
  15273. alpha,
  15274. mupdf.FzColorParams(mupdf.fz_default_color_params),
  15275. )
  15276. mupdf.fz_close_device( dev)
  15277. # copy generated resources into the one of the page
  15278. max_nums = JM_merge_resources( pdfpage, resources)
  15279. cont_string = JM_EscapeStrFromBuffer( contents)
  15280. result = (max_nums, cont_string)
  15281. val = result
  15282. max_nums = val[0]
  15283. content = val[1]
  15284. max_alp, max_font = max_nums
  15285. old_cont_lines = content.splitlines()
  15286. optcont = page._get_optional_content(oc)
  15287. if optcont is not None:
  15288. bdc = "/OC /%s BDC" % optcont
  15289. emc = "EMC"
  15290. else:
  15291. bdc = emc = ""
  15292. new_cont_lines = ["q"]
  15293. if bdc:
  15294. new_cont_lines.append(bdc)
  15295. cb = page.cropbox_position
  15296. if page.rotation in (90, 270):
  15297. delta = page.rect.height - page.rect.width
  15298. else:
  15299. delta = 0
  15300. mb = page.mediabox
  15301. if bool(cb) or mb.y0 != 0 or delta != 0:
  15302. new_cont_lines.append(f"1 0 0 1 {_format_g((cb.x, cb.y + mb.y0 - delta))} cm")
  15303. if morph:
  15304. p = morph[0] * self.ictm
  15305. delta = Matrix(1, 1).pretranslate(p.x, p.y)
  15306. matrix = ~delta * morph[1] * delta
  15307. if morph or matrix:
  15308. new_cont_lines.append(_format_g(JM_TUPLE(matrix)) + " cm")
  15309. for line in old_cont_lines:
  15310. if line.endswith(" cm"):
  15311. continue
  15312. if line == "BT":
  15313. new_cont_lines.append(line)
  15314. new_cont_lines.append("%i Tr" % render_mode)
  15315. continue
  15316. if line.endswith(" gs"):
  15317. alp = int(line.split()[0][4:]) + max_alp
  15318. line = "/Alp%i gs" % alp
  15319. elif line.endswith(" Tf"):
  15320. temp = line.split()
  15321. fsize = float(temp[1])
  15322. if render_mode != 0:
  15323. w = fsize * 0.05
  15324. else:
  15325. w = 1
  15326. new_cont_lines.append(_format_g(w) + " w")
  15327. font = int(temp[0][2:]) + max_font
  15328. line = " ".join(["/F%i" % font] + temp[1:])
  15329. elif line.endswith(" rg"):
  15330. new_cont_lines.append(line.replace("rg", "RG"))
  15331. elif line.endswith(" g"):
  15332. new_cont_lines.append(line.replace(" g", " G"))
  15333. elif line.endswith(" k"):
  15334. new_cont_lines.append(line.replace(" k", " K"))
  15335. new_cont_lines.append(line)
  15336. if emc:
  15337. new_cont_lines.append(emc)
  15338. new_cont_lines.append("Q\n")
  15339. content = "\n".join(new_cont_lines).encode("utf-8")
  15340. TOOLS._insert_contents(page, content, overlay=overlay)
  15341. val = None
  15342. for font in self.used_fonts:
  15343. repair_mono_font(page, font)
  15344. return val
  15345. class IRect:
  15346. """
  15347. IRect() - all zeros
  15348. IRect(x0, y0, x1, y1) - 4 coordinates
  15349. IRect(top-left, x1, y1) - point and 2 coordinates
  15350. IRect(x0, y0, bottom-right) - 2 coordinates and point
  15351. IRect(top-left, bottom-right) - 2 points
  15352. IRect(sequ) - new from sequence or rect-like
  15353. """
  15354. def __add__(self, p):
  15355. return Rect.__add__(self, p).round()
  15356. def __and__(self, x):
  15357. return Rect.__and__(self, x).round()
  15358. def __contains__(self, x):
  15359. return Rect.__contains__(self, x)
  15360. def __eq__(self, r):
  15361. if not hasattr(r, "__len__"):
  15362. return False
  15363. return len(r) == 4 and self.x0 == r[0] and self.y0 == r[1] and self.x1 == r[2] and self.y1 == r[3]
  15364. def __getitem__(self, i):
  15365. return (self.x0, self.y0, self.x1, self.y1)[i]
  15366. def __hash__(self):
  15367. return hash(tuple(self))
  15368. def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
  15369. self.x0, self.y0, self.x1, self.y1 = util_make_irect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
  15370. def __len__(self):
  15371. return 4
  15372. def __mul__(self, m):
  15373. return Rect.__mul__(self, m).round()
  15374. def __neg__(self):
  15375. return IRect(-self.x0, -self.y0, -self.x1, -self.y1)
  15376. def __or__(self, x):
  15377. return Rect.__or__(self, x).round()
  15378. def __pos__(self):
  15379. return IRect(self)
  15380. def __repr__(self):
  15381. return "IRect" + str(tuple(self))
  15382. def __setitem__(self, i, v):
  15383. v = int(v)
  15384. if i == 0: self.x0 = v
  15385. elif i == 1: self.y0 = v
  15386. elif i == 2: self.x1 = v
  15387. elif i == 3: self.y1 = v
  15388. else:
  15389. raise IndexError("index out of range")
  15390. return None
  15391. def __sub__(self, p):
  15392. return Rect.__sub__(self, p).round()
  15393. def __truediv__(self, m):
  15394. return Rect.__truediv__(self, m).round()
  15395. @property
  15396. def bottom_left(self):
  15397. """Bottom-left corner."""
  15398. return Point(self.x0, self.y1)
  15399. @property
  15400. def bottom_right(self):
  15401. """Bottom-right corner."""
  15402. return Point(self.x1, self.y1)
  15403. @property
  15404. def height(self):
  15405. return max(0, self.y1 - self.y0)
  15406. def contains(self, x):
  15407. """Check if x is in the rectangle."""
  15408. return self.__contains__(x)
  15409. def get_area(self, *args) -> float:
  15410. """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
  15411. return _rect_area(self.width, self.height, args)
  15412. def include_point(self, p):
  15413. """Extend rectangle to include point p."""
  15414. rect = self.rect.include_point(p)
  15415. return rect.irect
  15416. def include_rect(self, r):
  15417. """Extend rectangle to include rectangle r."""
  15418. rect = self.rect.include_rect(r)
  15419. return rect.irect
  15420. def intersect(self, r):
  15421. """Restrict rectangle to intersection with rectangle r."""
  15422. return Rect.intersect(self, r).round()
  15423. def intersects(self, x):
  15424. return Rect.intersects(self, x)
  15425. @property
  15426. def is_empty(self):
  15427. """True if rectangle area is empty."""
  15428. return self.x0 >= self.x1 or self.y0 >= self.y1
  15429. @property
  15430. def is_infinite(self):
  15431. """True if rectangle is infinite."""
  15432. return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
  15433. @property
  15434. def is_valid(self):
  15435. """True if rectangle is valid."""
  15436. return self.x0 <= self.x1 and self.y0 <= self.y1
  15437. def morph(self, p, m):
  15438. """Morph with matrix-like m and point-like p.
  15439. Returns a new quad."""
  15440. if self.is_infinite:
  15441. return INFINITE_QUAD()
  15442. return self.quad.morph(p, m)
  15443. def norm(self):
  15444. return math.sqrt(sum([c*c for c in self]))
  15445. def normalize(self):
  15446. """Replace rectangle with its valid version."""
  15447. if self.x1 < self.x0:
  15448. self.x0, self.x1 = self.x1, self.x0
  15449. if self.y1 < self.y0:
  15450. self.y0, self.y1 = self.y1, self.y0
  15451. return self
  15452. @property
  15453. def quad(self):
  15454. """Return Quad version of rectangle."""
  15455. return Quad(self.tl, self.tr, self.bl, self.br)
  15456. @property
  15457. def rect(self):
  15458. return Rect(self)
  15459. @property
  15460. def top_left(self):
  15461. """Top-left corner."""
  15462. return Point(self.x0, self.y0)
  15463. @property
  15464. def top_right(self):
  15465. """Top-right corner."""
  15466. return Point(self.x1, self.y0)
  15467. def torect(self, r):
  15468. """Return matrix that converts to target rect."""
  15469. r = Rect(r)
  15470. if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
  15471. raise ValueError("rectangles must be finite and not empty")
  15472. return (
  15473. Matrix(1, 0, 0, 1, -self.x0, -self.y0)
  15474. * Matrix(r.width / self.width, r.height / self.height)
  15475. * Matrix(1, 0, 0, 1, r.x0, r.y0)
  15476. )
  15477. def transform(self, m):
  15478. return Rect.transform(self, m).round()
  15479. @property
  15480. def width(self):
  15481. return max(0, self.x1 - self.x0)
  15482. br = bottom_right
  15483. bl = bottom_left
  15484. tl = top_left
  15485. tr = top_right
  15486. # Data
  15487. #
  15488. if 1:
  15489. _self = sys.modules[__name__]
  15490. if 1:
  15491. for _name, _value in mupdf.__dict__.items():
  15492. if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
  15493. if _name.startswith('PDF_ENUM_NAME_'):
  15494. # Not a simple enum.
  15495. pass
  15496. else:
  15497. #assert not inspect.isroutine(value)
  15498. #log(f'importing {_name=} {_value=}.')
  15499. setattr(_self, _name, _value)
  15500. #log(f'{getattr( self, name, None)=}')
  15501. else:
  15502. # This is slow due to importing inspect, e.g. 0.019 instead of 0.004.
  15503. for _name, _value in inspect.getmembers(mupdf):
  15504. if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
  15505. if _name.startswith('PDF_ENUM_NAME_'):
  15506. # Not a simple enum.
  15507. pass
  15508. else:
  15509. #assert not inspect.isroutine(value)
  15510. #log(f'importing {name}')
  15511. setattr(_self, _name, _value)
  15512. #log(f'{getattr( self, name, None)=}')
  15513. # This is a macro so not preserved in mupdf C++/Python bindings.
  15514. #
  15515. PDF_SIGNATURE_DEFAULT_APPEARANCE = (0
  15516. | mupdf.PDF_SIGNATURE_SHOW_LABELS
  15517. | mupdf.PDF_SIGNATURE_SHOW_DN
  15518. | mupdf.PDF_SIGNATURE_SHOW_DATE
  15519. | mupdf.PDF_SIGNATURE_SHOW_TEXT_NAME
  15520. | mupdf.PDF_SIGNATURE_SHOW_GRAPHIC_NAME
  15521. | mupdf.PDF_SIGNATURE_SHOW_LOGO
  15522. )
  15523. #UCDN_SCRIPT_ADLAM = mupdf.UCDN_SCRIPT_ADLAM
  15524. #setattr(self, 'UCDN_SCRIPT_ADLAM', mupdf.UCDN_SCRIPT_ADLAM)
  15525. assert mupdf.UCDN_EAST_ASIAN_H == 1
  15526. # Flake8 incorrectly fails next two lines because we've dynamically added
  15527. # items to self.
  15528. assert PDF_TX_FIELD_IS_MULTILINE == mupdf.PDF_TX_FIELD_IS_MULTILINE # noqa: F821
  15529. assert UCDN_SCRIPT_ADLAM == mupdf.UCDN_SCRIPT_ADLAM # noqa: F821
  15530. del _self, _name, _value
  15531. AnyType = typing.Any
  15532. Base14_fontnames = (
  15533. "Courier",
  15534. "Courier-Oblique",
  15535. "Courier-Bold",
  15536. "Courier-BoldOblique",
  15537. "Helvetica",
  15538. "Helvetica-Oblique",
  15539. "Helvetica-Bold",
  15540. "Helvetica-BoldOblique",
  15541. "Times-Roman",
  15542. "Times-Italic",
  15543. "Times-Bold",
  15544. "Times-BoldItalic",
  15545. "Symbol",
  15546. "ZapfDingbats",
  15547. )
  15548. Base14_fontdict = {}
  15549. for f in Base14_fontnames:
  15550. Base14_fontdict[f.lower()] = f
  15551. Base14_fontdict["helv"] = "Helvetica"
  15552. Base14_fontdict["heit"] = "Helvetica-Oblique"
  15553. Base14_fontdict["hebo"] = "Helvetica-Bold"
  15554. Base14_fontdict["hebi"] = "Helvetica-BoldOblique"
  15555. Base14_fontdict["cour"] = "Courier"
  15556. Base14_fontdict["coit"] = "Courier-Oblique"
  15557. Base14_fontdict["cobo"] = "Courier-Bold"
  15558. Base14_fontdict["cobi"] = "Courier-BoldOblique"
  15559. Base14_fontdict["tiro"] = "Times-Roman"
  15560. Base14_fontdict["tibo"] = "Times-Bold"
  15561. Base14_fontdict["tiit"] = "Times-Italic"
  15562. Base14_fontdict["tibi"] = "Times-BoldItalic"
  15563. Base14_fontdict["symb"] = "Symbol"
  15564. Base14_fontdict["zadb"] = "ZapfDingbats"
  15565. EPSILON = 1e-5
  15566. FLT_EPSILON = 1e-5
  15567. # largest 32bit integers surviving C float conversion roundtrips
  15568. # used by MuPDF to define infinite rectangles
  15569. FZ_MIN_INF_RECT = -0x80000000
  15570. FZ_MAX_INF_RECT = 0x7fffff80
  15571. JM_annot_id_stem = "fitz"
  15572. JM_mupdf_warnings_store = []
  15573. JM_mupdf_show_errors = 1
  15574. JM_mupdf_show_warnings = 0
  15575. # ------------------------------------------------------------------------------
  15576. # Image recompression constants
  15577. # ------------------------------------------------------------------------------
  15578. FZ_RECOMPRESS_NEVER = mupdf.FZ_RECOMPRESS_NEVER
  15579. FZ_RECOMPRESS_SAME = mupdf.FZ_RECOMPRESS_SAME
  15580. FZ_RECOMPRESS_LOSSLESS = mupdf.FZ_RECOMPRESS_LOSSLESS
  15581. FZ_RECOMPRESS_JPEG = mupdf.FZ_RECOMPRESS_JPEG
  15582. FZ_RECOMPRESS_J2K = mupdf.FZ_RECOMPRESS_J2K
  15583. FZ_RECOMPRESS_FAX = mupdf.FZ_RECOMPRESS_FAX
  15584. FZ_SUBSAMPLE_AVERAGE = mupdf.FZ_SUBSAMPLE_AVERAGE
  15585. FZ_SUBSAMPLE_BICUBIC = mupdf.FZ_SUBSAMPLE_BICUBIC
  15586. # ------------------------------------------------------------------------------
  15587. # Various PDF Optional Content Flags
  15588. # ------------------------------------------------------------------------------
  15589. PDF_OC_ON = 0
  15590. PDF_OC_TOGGLE = 1
  15591. PDF_OC_OFF = 2
  15592. # ------------------------------------------------------------------------------
  15593. # link kinds and link flags
  15594. # ------------------------------------------------------------------------------
  15595. LINK_NONE = 0
  15596. LINK_GOTO = 1
  15597. LINK_URI = 2
  15598. LINK_LAUNCH = 3
  15599. LINK_NAMED = 4
  15600. LINK_GOTOR = 5
  15601. LINK_FLAG_L_VALID = 1
  15602. LINK_FLAG_T_VALID = 2
  15603. LINK_FLAG_R_VALID = 4
  15604. LINK_FLAG_B_VALID = 8
  15605. LINK_FLAG_FIT_H = 16
  15606. LINK_FLAG_FIT_V = 32
  15607. LINK_FLAG_R_IS_ZOOM = 64
  15608. SigFlag_SignaturesExist = 1
  15609. SigFlag_AppendOnly = 2
  15610. STAMP_Approved = 0
  15611. STAMP_AsIs = 1
  15612. STAMP_Confidential = 2
  15613. STAMP_Departmental = 3
  15614. STAMP_Experimental = 4
  15615. STAMP_Expired = 5
  15616. STAMP_Final = 6
  15617. STAMP_ForComment = 7
  15618. STAMP_ForPublicRelease = 8
  15619. STAMP_NotApproved = 9
  15620. STAMP_NotForPublicRelease = 10
  15621. STAMP_Sold = 11
  15622. STAMP_TopSecret = 12
  15623. STAMP_Draft = 13
  15624. TEXT_ALIGN_LEFT = 0
  15625. TEXT_ALIGN_CENTER = 1
  15626. TEXT_ALIGN_RIGHT = 2
  15627. TEXT_ALIGN_JUSTIFY = 3
  15628. TEXT_FONT_SUPERSCRIPT = 1
  15629. TEXT_FONT_ITALIC = 2
  15630. TEXT_FONT_SERIFED = 4
  15631. TEXT_FONT_MONOSPACED = 8
  15632. TEXT_FONT_BOLD = 16
  15633. TEXT_OUTPUT_TEXT = 0
  15634. TEXT_OUTPUT_HTML = 1
  15635. TEXT_OUTPUT_JSON = 2
  15636. TEXT_OUTPUT_XML = 3
  15637. TEXT_OUTPUT_XHTML = 4
  15638. TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES
  15639. TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE
  15640. TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES
  15641. TEXT_INHIBIT_SPACES = mupdf.FZ_STEXT_INHIBIT_SPACES
  15642. TEXT_DEHYPHENATE = mupdf.FZ_STEXT_DEHYPHENATE
  15643. TEXT_PRESERVE_SPANS = mupdf.FZ_STEXT_PRESERVE_SPANS
  15644. TEXT_MEDIABOX_CLIP = mupdf.FZ_STEXT_MEDIABOX_CLIP
  15645. TEXT_USE_CID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15646. TEXT_COLLECT_STRUCTURE = mupdf.FZ_STEXT_COLLECT_STRUCTURE
  15647. TEXT_ACCURATE_BBOXES = mupdf.FZ_STEXT_ACCURATE_BBOXES
  15648. TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS
  15649. TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT
  15650. TEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT
  15651. if mupdf_version_tuple >= (1, 26):
  15652. TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK
  15653. TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT
  15654. TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES
  15655. TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE
  15656. TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT
  15657. TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS
  15658. TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS
  15659. # 2025-05-07: Non-standard names preserved for backwards compatibility.
  15660. TEXT_STEXT_SEGMENT = TEXT_SEGMENT
  15661. TEXT_CID_FOR_UNKNOWN_UNICODE = TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15662. TEXTFLAGS_WORDS = (0
  15663. | TEXT_PRESERVE_LIGATURES
  15664. | TEXT_PRESERVE_WHITESPACE
  15665. | TEXT_MEDIABOX_CLIP
  15666. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15667. )
  15668. TEXTFLAGS_BLOCKS = (0
  15669. | TEXT_PRESERVE_LIGATURES
  15670. | TEXT_PRESERVE_WHITESPACE
  15671. | TEXT_MEDIABOX_CLIP
  15672. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15673. )
  15674. TEXTFLAGS_DICT = (0
  15675. | TEXT_PRESERVE_LIGATURES
  15676. | TEXT_PRESERVE_WHITESPACE
  15677. | TEXT_MEDIABOX_CLIP
  15678. | TEXT_PRESERVE_IMAGES
  15679. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15680. )
  15681. TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT
  15682. TEXTFLAGS_SEARCH = (0
  15683. | TEXT_PRESERVE_WHITESPACE
  15684. | TEXT_MEDIABOX_CLIP
  15685. | TEXT_DEHYPHENATE
  15686. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15687. )
  15688. TEXTFLAGS_HTML = (0
  15689. | TEXT_PRESERVE_LIGATURES
  15690. | TEXT_PRESERVE_WHITESPACE
  15691. | TEXT_MEDIABOX_CLIP
  15692. | TEXT_PRESERVE_IMAGES
  15693. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15694. )
  15695. TEXTFLAGS_XHTML = (0
  15696. | TEXT_PRESERVE_LIGATURES
  15697. | TEXT_PRESERVE_WHITESPACE
  15698. | TEXT_MEDIABOX_CLIP
  15699. | TEXT_PRESERVE_IMAGES
  15700. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15701. )
  15702. TEXTFLAGS_XML = (0
  15703. | TEXT_PRESERVE_LIGATURES
  15704. | TEXT_PRESERVE_WHITESPACE
  15705. | TEXT_MEDIABOX_CLIP
  15706. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15707. )
  15708. TEXTFLAGS_TEXT = (0
  15709. | TEXT_PRESERVE_LIGATURES
  15710. | TEXT_PRESERVE_WHITESPACE
  15711. | TEXT_MEDIABOX_CLIP
  15712. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  15713. )
  15714. # Simple text encoding options
  15715. TEXT_ENCODING_LATIN = 0
  15716. TEXT_ENCODING_GREEK = 1
  15717. TEXT_ENCODING_CYRILLIC = 2
  15718. TOOLS_JM_UNIQUE_ID = 0
  15719. # colorspace identifiers
  15720. CS_RGB = 1
  15721. CS_GRAY = 2
  15722. CS_CMYK = 3
  15723. # PDF Blend Modes
  15724. PDF_BM_Color = "Color"
  15725. PDF_BM_ColorBurn = "ColorBurn"
  15726. PDF_BM_ColorDodge = "ColorDodge"
  15727. PDF_BM_Darken = "Darken"
  15728. PDF_BM_Difference = "Difference"
  15729. PDF_BM_Exclusion = "Exclusion"
  15730. PDF_BM_HardLight = "HardLight"
  15731. PDF_BM_Hue = "Hue"
  15732. PDF_BM_Lighten = "Lighten"
  15733. PDF_BM_Luminosity = "Luminosity"
  15734. PDF_BM_Multiply = "Multiply"
  15735. PDF_BM_Normal = "Normal"
  15736. PDF_BM_Overlay = "Overlay"
  15737. PDF_BM_Saturation = "Saturation"
  15738. PDF_BM_Screen = "Screen"
  15739. PDF_BM_SoftLight = "Softlight"
  15740. annot_skel = {
  15741. "goto1": lambda a, b, c, d, e: f"<</A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>/Rect[{e}]/BS<</W 0>>/Subtype/Link>>",
  15742. "goto2": lambda a, b: f"<</A<</S/GoTo/D{a}>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
  15743. "gotor1": lambda a, b, c, d, e, f, g: f"<</A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F({e})/UF({f})/Type/Filespec>>>>/Rect[{g}]/BS<</W 0>>/Subtype/Link>>",
  15744. "gotor2": lambda a, b, c: f"<</A<</S/GoToR/D{a}/F({b})>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
  15745. "launch": lambda a, b, c: f"<</A<</S/Launch/F<</F({a})/UF({b})/Type/Filespec>>>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
  15746. "uri": lambda a, b: f"<</A<</S/URI/URI({a})>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
  15747. "named": lambda a, b: f"<</A<</S/GoTo/D({a})/Type/Action>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
  15748. }
  15749. class FileDataError(RuntimeError):
  15750. """Raised for documents with file structure issues."""
  15751. pass
  15752. class FileNotFoundError(RuntimeError):
  15753. """Raised if file does not exist."""
  15754. pass
  15755. class EmptyFileError(FileDataError):
  15756. """Raised when creating documents from zero-length data."""
  15757. pass
  15758. # propagate exception class to C-level code
  15759. #_set_FileDataError(FileDataError)
  15760. csRGB = Colorspace(CS_RGB)
  15761. csGRAY = Colorspace(CS_GRAY)
  15762. csCMYK = Colorspace(CS_CMYK)
  15763. # These don't appear to be visible in classic, but are used
  15764. # internally.
  15765. #
  15766. dictkey_align = "align"
  15767. dictkey_asc = "ascender"
  15768. dictkey_bidi = "bidi"
  15769. dictkey_bbox = "bbox"
  15770. dictkey_blocks = "blocks"
  15771. dictkey_bpc = "bpc"
  15772. dictkey_c = "c"
  15773. dictkey_chars = "chars"
  15774. dictkey_color = "color"
  15775. dictkey_colorspace = "colorspace"
  15776. dictkey_content = "content"
  15777. dictkey_creationDate = "creationDate"
  15778. dictkey_cs_name = "cs-name"
  15779. dictkey_da = "da"
  15780. dictkey_dashes = "dashes"
  15781. dictkey_descr = "description"
  15782. dictkey_desc = "descender"
  15783. dictkey_dir = "dir"
  15784. dictkey_effect = "effect"
  15785. dictkey_ext = "ext"
  15786. dictkey_filename = "filename"
  15787. dictkey_fill = "fill"
  15788. dictkey_flags = "flags"
  15789. dictkey_char_flags = "char_flags"
  15790. dictkey_font = "font"
  15791. dictkey_glyph = "glyph"
  15792. dictkey_height = "height"
  15793. dictkey_id = "id"
  15794. dictkey_image = "image"
  15795. dictkey_items = "items"
  15796. dictkey_length = "length"
  15797. dictkey_lines = "lines"
  15798. dictkey_matrix = "transform"
  15799. dictkey_modDate = "modDate"
  15800. dictkey_name = "name"
  15801. dictkey_number = "number"
  15802. dictkey_origin = "origin"
  15803. dictkey_rect = "rect"
  15804. dictkey_size = "size"
  15805. dictkey_smask = "smask"
  15806. dictkey_spans = "spans"
  15807. dictkey_stroke = "stroke"
  15808. dictkey_style = "style"
  15809. dictkey_subject = "subject"
  15810. dictkey_text = "text"
  15811. dictkey_title = "title"
  15812. dictkey_type = "type"
  15813. dictkey_ufilename = "ufilename"
  15814. dictkey_width = "width"
  15815. dictkey_wmode = "wmode"
  15816. dictkey_xref = "xref"
  15817. dictkey_xres = "xres"
  15818. dictkey_yres = "yres"
  15819. try:
  15820. from pymupdf_fonts import fontdescriptors, fontbuffers
  15821. fitz_fontdescriptors = fontdescriptors.copy()
  15822. for k in fitz_fontdescriptors.keys():
  15823. fitz_fontdescriptors[k]["loader"] = fontbuffers[k]
  15824. del fontdescriptors, fontbuffers
  15825. except ImportError:
  15826. fitz_fontdescriptors = {}
  15827. symbol_glyphs = ( # Glyph list for the built-in font 'Symbol'
  15828. (183, 0.46),
  15829. (183, 0.46),
  15830. (183, 0.46),
  15831. (183, 0.46),
  15832. (183, 0.46),
  15833. (183, 0.46),
  15834. (183, 0.46),
  15835. (183, 0.46),
  15836. (183, 0.46),
  15837. (183, 0.46),
  15838. (183, 0.46),
  15839. (183, 0.46),
  15840. (183, 0.46),
  15841. (183, 0.46),
  15842. (183, 0.46),
  15843. (183, 0.46),
  15844. (183, 0.46),
  15845. (183, 0.46),
  15846. (183, 0.46),
  15847. (183, 0.46),
  15848. (183, 0.46),
  15849. (183, 0.46),
  15850. (183, 0.46),
  15851. (183, 0.46),
  15852. (183, 0.46),
  15853. (183, 0.46),
  15854. (183, 0.46),
  15855. (183, 0.46),
  15856. (183, 0.46),
  15857. (183, 0.46),
  15858. (183, 0.46),
  15859. (183, 0.46),
  15860. (32, 0.25),
  15861. (33, 0.333),
  15862. (34, 0.713),
  15863. (35, 0.5),
  15864. (36, 0.549),
  15865. (37, 0.833),
  15866. (38, 0.778),
  15867. (39, 0.439),
  15868. (40, 0.333),
  15869. (41, 0.333),
  15870. (42, 0.5),
  15871. (43, 0.549),
  15872. (44, 0.25),
  15873. (45, 0.549),
  15874. (46, 0.25),
  15875. (47, 0.278),
  15876. (48, 0.5),
  15877. (49, 0.5),
  15878. (50, 0.5),
  15879. (51, 0.5),
  15880. (52, 0.5),
  15881. (53, 0.5),
  15882. (54, 0.5),
  15883. (55, 0.5),
  15884. (56, 0.5),
  15885. (57, 0.5),
  15886. (58, 0.278),
  15887. (59, 0.278),
  15888. (60, 0.549),
  15889. (61, 0.549),
  15890. (62, 0.549),
  15891. (63, 0.444),
  15892. (64, 0.549),
  15893. (65, 0.722),
  15894. (66, 0.667),
  15895. (67, 0.722),
  15896. (68, 0.612),
  15897. (69, 0.611),
  15898. (70, 0.763),
  15899. (71, 0.603),
  15900. (72, 0.722),
  15901. (73, 0.333),
  15902. (74, 0.631),
  15903. (75, 0.722),
  15904. (76, 0.686),
  15905. (77, 0.889),
  15906. (78, 0.722),
  15907. (79, 0.722),
  15908. (80, 0.768),
  15909. (81, 0.741),
  15910. (82, 0.556),
  15911. (83, 0.592),
  15912. (84, 0.611),
  15913. (85, 0.69),
  15914. (86, 0.439),
  15915. (87, 0.768),
  15916. (88, 0.645),
  15917. (89, 0.795),
  15918. (90, 0.611),
  15919. (91, 0.333),
  15920. (92, 0.863),
  15921. (93, 0.333),
  15922. (94, 0.658),
  15923. (95, 0.5),
  15924. (96, 0.5),
  15925. (97, 0.631),
  15926. (98, 0.549),
  15927. (99, 0.549),
  15928. (100, 0.494),
  15929. (101, 0.439),
  15930. (102, 0.521),
  15931. (103, 0.411),
  15932. (104, 0.603),
  15933. (105, 0.329),
  15934. (106, 0.603),
  15935. (107, 0.549),
  15936. (108, 0.549),
  15937. (109, 0.576),
  15938. (110, 0.521),
  15939. (111, 0.549),
  15940. (112, 0.549),
  15941. (113, 0.521),
  15942. (114, 0.549),
  15943. (115, 0.603),
  15944. (116, 0.439),
  15945. (117, 0.576),
  15946. (118, 0.713),
  15947. (119, 0.686),
  15948. (120, 0.493),
  15949. (121, 0.686),
  15950. (122, 0.494),
  15951. (123, 0.48),
  15952. (124, 0.2),
  15953. (125, 0.48),
  15954. (126, 0.549),
  15955. (183, 0.46),
  15956. (183, 0.46),
  15957. (183, 0.46),
  15958. (183, 0.46),
  15959. (183, 0.46),
  15960. (183, 0.46),
  15961. (183, 0.46),
  15962. (183, 0.46),
  15963. (183, 0.46),
  15964. (183, 0.46),
  15965. (183, 0.46),
  15966. (183, 0.46),
  15967. (183, 0.46),
  15968. (183, 0.46),
  15969. (183, 0.46),
  15970. (183, 0.46),
  15971. (183, 0.46),
  15972. (183, 0.46),
  15973. (183, 0.46),
  15974. (183, 0.46),
  15975. (183, 0.46),
  15976. (183, 0.46),
  15977. (183, 0.46),
  15978. (183, 0.46),
  15979. (183, 0.46),
  15980. (183, 0.46),
  15981. (183, 0.46),
  15982. (183, 0.46),
  15983. (183, 0.46),
  15984. (183, 0.46),
  15985. (183, 0.46),
  15986. (183, 0.46),
  15987. (183, 0.46),
  15988. (160, 0.25),
  15989. (161, 0.62),
  15990. (162, 0.247),
  15991. (163, 0.549),
  15992. (164, 0.167),
  15993. (165, 0.713),
  15994. (166, 0.5),
  15995. (167, 0.753),
  15996. (168, 0.753),
  15997. (169, 0.753),
  15998. (170, 0.753),
  15999. (171, 1.042),
  16000. (172, 0.713),
  16001. (173, 0.603),
  16002. (174, 0.987),
  16003. (175, 0.603),
  16004. (176, 0.4),
  16005. (177, 0.549),
  16006. (178, 0.411),
  16007. (179, 0.549),
  16008. (180, 0.549),
  16009. (181, 0.576),
  16010. (182, 0.494),
  16011. (183, 0.46),
  16012. (184, 0.549),
  16013. (185, 0.549),
  16014. (186, 0.549),
  16015. (187, 0.549),
  16016. (188, 1),
  16017. (189, 0.603),
  16018. (190, 1),
  16019. (191, 0.658),
  16020. (192, 0.823),
  16021. (193, 0.686),
  16022. (194, 0.795),
  16023. (195, 0.987),
  16024. (196, 0.768),
  16025. (197, 0.768),
  16026. (198, 0.823),
  16027. (199, 0.768),
  16028. (200, 0.768),
  16029. (201, 0.713),
  16030. (202, 0.713),
  16031. (203, 0.713),
  16032. (204, 0.713),
  16033. (205, 0.713),
  16034. (206, 0.713),
  16035. (207, 0.713),
  16036. (208, 0.768),
  16037. (209, 0.713),
  16038. (210, 0.79),
  16039. (211, 0.79),
  16040. (212, 0.89),
  16041. (213, 0.823),
  16042. (214, 0.549),
  16043. (215, 0.549),
  16044. (216, 0.713),
  16045. (217, 0.603),
  16046. (218, 0.603),
  16047. (219, 1.042),
  16048. (220, 0.987),
  16049. (221, 0.603),
  16050. (222, 0.987),
  16051. (223, 0.603),
  16052. (224, 0.494),
  16053. (225, 0.329),
  16054. (226, 0.79),
  16055. (227, 0.79),
  16056. (228, 0.786),
  16057. (229, 0.713),
  16058. (230, 0.384),
  16059. (231, 0.384),
  16060. (232, 0.384),
  16061. (233, 0.384),
  16062. (234, 0.384),
  16063. (235, 0.384),
  16064. (236, 0.494),
  16065. (237, 0.494),
  16066. (238, 0.494),
  16067. (239, 0.494),
  16068. (183, 0.46),
  16069. (241, 0.329),
  16070. (242, 0.274),
  16071. (243, 0.686),
  16072. (244, 0.686),
  16073. (245, 0.686),
  16074. (246, 0.384),
  16075. (247, 0.549),
  16076. (248, 0.384),
  16077. (249, 0.384),
  16078. (250, 0.384),
  16079. (251, 0.384),
  16080. (252, 0.494),
  16081. (253, 0.494),
  16082. (254, 0.494),
  16083. (183, 0.46),
  16084. )
  16085. zapf_glyphs = ( # Glyph list for the built-in font 'ZapfDingbats'
  16086. (183, 0.788),
  16087. (183, 0.788),
  16088. (183, 0.788),
  16089. (183, 0.788),
  16090. (183, 0.788),
  16091. (183, 0.788),
  16092. (183, 0.788),
  16093. (183, 0.788),
  16094. (183, 0.788),
  16095. (183, 0.788),
  16096. (183, 0.788),
  16097. (183, 0.788),
  16098. (183, 0.788),
  16099. (183, 0.788),
  16100. (183, 0.788),
  16101. (183, 0.788),
  16102. (183, 0.788),
  16103. (183, 0.788),
  16104. (183, 0.788),
  16105. (183, 0.788),
  16106. (183, 0.788),
  16107. (183, 0.788),
  16108. (183, 0.788),
  16109. (183, 0.788),
  16110. (183, 0.788),
  16111. (183, 0.788),
  16112. (183, 0.788),
  16113. (183, 0.788),
  16114. (183, 0.788),
  16115. (183, 0.788),
  16116. (183, 0.788),
  16117. (183, 0.788),
  16118. (32, 0.278),
  16119. (33, 0.974),
  16120. (34, 0.961),
  16121. (35, 0.974),
  16122. (36, 0.98),
  16123. (37, 0.719),
  16124. (38, 0.789),
  16125. (39, 0.79),
  16126. (40, 0.791),
  16127. (41, 0.69),
  16128. (42, 0.96),
  16129. (43, 0.939),
  16130. (44, 0.549),
  16131. (45, 0.855),
  16132. (46, 0.911),
  16133. (47, 0.933),
  16134. (48, 0.911),
  16135. (49, 0.945),
  16136. (50, 0.974),
  16137. (51, 0.755),
  16138. (52, 0.846),
  16139. (53, 0.762),
  16140. (54, 0.761),
  16141. (55, 0.571),
  16142. (56, 0.677),
  16143. (57, 0.763),
  16144. (58, 0.76),
  16145. (59, 0.759),
  16146. (60, 0.754),
  16147. (61, 0.494),
  16148. (62, 0.552),
  16149. (63, 0.537),
  16150. (64, 0.577),
  16151. (65, 0.692),
  16152. (66, 0.786),
  16153. (67, 0.788),
  16154. (68, 0.788),
  16155. (69, 0.79),
  16156. (70, 0.793),
  16157. (71, 0.794),
  16158. (72, 0.816),
  16159. (73, 0.823),
  16160. (74, 0.789),
  16161. (75, 0.841),
  16162. (76, 0.823),
  16163. (77, 0.833),
  16164. (78, 0.816),
  16165. (79, 0.831),
  16166. (80, 0.923),
  16167. (81, 0.744),
  16168. (82, 0.723),
  16169. (83, 0.749),
  16170. (84, 0.79),
  16171. (85, 0.792),
  16172. (86, 0.695),
  16173. (87, 0.776),
  16174. (88, 0.768),
  16175. (89, 0.792),
  16176. (90, 0.759),
  16177. (91, 0.707),
  16178. (92, 0.708),
  16179. (93, 0.682),
  16180. (94, 0.701),
  16181. (95, 0.826),
  16182. (96, 0.815),
  16183. (97, 0.789),
  16184. (98, 0.789),
  16185. (99, 0.707),
  16186. (100, 0.687),
  16187. (101, 0.696),
  16188. (102, 0.689),
  16189. (103, 0.786),
  16190. (104, 0.787),
  16191. (105, 0.713),
  16192. (106, 0.791),
  16193. (107, 0.785),
  16194. (108, 0.791),
  16195. (109, 0.873),
  16196. (110, 0.761),
  16197. (111, 0.762),
  16198. (112, 0.762),
  16199. (113, 0.759),
  16200. (114, 0.759),
  16201. (115, 0.892),
  16202. (116, 0.892),
  16203. (117, 0.788),
  16204. (118, 0.784),
  16205. (119, 0.438),
  16206. (120, 0.138),
  16207. (121, 0.277),
  16208. (122, 0.415),
  16209. (123, 0.392),
  16210. (124, 0.392),
  16211. (125, 0.668),
  16212. (126, 0.668),
  16213. (183, 0.788),
  16214. (183, 0.788),
  16215. (183, 0.788),
  16216. (183, 0.788),
  16217. (183, 0.788),
  16218. (183, 0.788),
  16219. (183, 0.788),
  16220. (183, 0.788),
  16221. (183, 0.788),
  16222. (183, 0.788),
  16223. (183, 0.788),
  16224. (183, 0.788),
  16225. (183, 0.788),
  16226. (183, 0.788),
  16227. (183, 0.788),
  16228. (183, 0.788),
  16229. (183, 0.788),
  16230. (183, 0.788),
  16231. (183, 0.788),
  16232. (183, 0.788),
  16233. (183, 0.788),
  16234. (183, 0.788),
  16235. (183, 0.788),
  16236. (183, 0.788),
  16237. (183, 0.788),
  16238. (183, 0.788),
  16239. (183, 0.788),
  16240. (183, 0.788),
  16241. (183, 0.788),
  16242. (183, 0.788),
  16243. (183, 0.788),
  16244. (183, 0.788),
  16245. (183, 0.788),
  16246. (183, 0.788),
  16247. (161, 0.732),
  16248. (162, 0.544),
  16249. (163, 0.544),
  16250. (164, 0.91),
  16251. (165, 0.667),
  16252. (166, 0.76),
  16253. (167, 0.76),
  16254. (168, 0.776),
  16255. (169, 0.595),
  16256. (170, 0.694),
  16257. (171, 0.626),
  16258. (172, 0.788),
  16259. (173, 0.788),
  16260. (174, 0.788),
  16261. (175, 0.788),
  16262. (176, 0.788),
  16263. (177, 0.788),
  16264. (178, 0.788),
  16265. (179, 0.788),
  16266. (180, 0.788),
  16267. (181, 0.788),
  16268. (182, 0.788),
  16269. (183, 0.788),
  16270. (184, 0.788),
  16271. (185, 0.788),
  16272. (186, 0.788),
  16273. (187, 0.788),
  16274. (188, 0.788),
  16275. (189, 0.788),
  16276. (190, 0.788),
  16277. (191, 0.788),
  16278. (192, 0.788),
  16279. (193, 0.788),
  16280. (194, 0.788),
  16281. (195, 0.788),
  16282. (196, 0.788),
  16283. (197, 0.788),
  16284. (198, 0.788),
  16285. (199, 0.788),
  16286. (200, 0.788),
  16287. (201, 0.788),
  16288. (202, 0.788),
  16289. (203, 0.788),
  16290. (204, 0.788),
  16291. (205, 0.788),
  16292. (206, 0.788),
  16293. (207, 0.788),
  16294. (208, 0.788),
  16295. (209, 0.788),
  16296. (210, 0.788),
  16297. (211, 0.788),
  16298. (212, 0.894),
  16299. (213, 0.838),
  16300. (214, 1.016),
  16301. (215, 0.458),
  16302. (216, 0.748),
  16303. (217, 0.924),
  16304. (218, 0.748),
  16305. (219, 0.918),
  16306. (220, 0.927),
  16307. (221, 0.928),
  16308. (222, 0.928),
  16309. (223, 0.834),
  16310. (224, 0.873),
  16311. (225, 0.828),
  16312. (226, 0.924),
  16313. (227, 0.924),
  16314. (228, 0.917),
  16315. (229, 0.93),
  16316. (230, 0.931),
  16317. (231, 0.463),
  16318. (232, 0.883),
  16319. (233, 0.836),
  16320. (234, 0.836),
  16321. (235, 0.867),
  16322. (236, 0.867),
  16323. (237, 0.696),
  16324. (238, 0.696),
  16325. (239, 0.874),
  16326. (183, 0.788),
  16327. (241, 0.874),
  16328. (242, 0.76),
  16329. (243, 0.946),
  16330. (244, 0.771),
  16331. (245, 0.865),
  16332. (246, 0.771),
  16333. (247, 0.888),
  16334. (248, 0.967),
  16335. (249, 0.888),
  16336. (250, 0.831),
  16337. (251, 0.873),
  16338. (252, 0.927),
  16339. (253, 0.97),
  16340. (183, 0.788),
  16341. (183, 0.788),
  16342. )
  16343. # Functions
  16344. #
  16345. def _rect_area(width, height, args):
  16346. # Used by IRect.get_area() and Rect.get_area().
  16347. unit = args[0] if args else 'px'
  16348. u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
  16349. f = (u[unit][0] / u[unit][1]) ** 2
  16350. return f * width * height
  16351. def _read_samples( pixmap, offset, n):
  16352. # fixme: need to be able to get a sample in one call, as a Python
  16353. # bytes or similar.
  16354. ret = []
  16355. if not pixmap.samples():
  16356. # mupdf.fz_samples_get() gives a segv if pixmap->samples is null.
  16357. return ret
  16358. for i in range( n):
  16359. ret.append( mupdf.fz_samples_get( pixmap, offset + i))
  16360. return bytes( ret)
  16361. def _INRANGE(v, low, high):
  16362. return low <= v and v <= high
  16363. def _remove_dest_range(pdf, numbers):
  16364. pagecount = mupdf.pdf_count_pages(pdf)
  16365. for i in range(pagecount):
  16366. n1 = i
  16367. if n1 in numbers:
  16368. continue
  16369. pageref = mupdf.pdf_lookup_page_obj( pdf, i)
  16370. annots = mupdf.pdf_dict_get( pageref, PDF_NAME('Annots'))
  16371. if not annots.m_internal:
  16372. continue
  16373. len_ = mupdf.pdf_array_len(annots)
  16374. for j in range(len_ - 1, -1, -1):
  16375. o = mupdf.pdf_array_get( annots, j)
  16376. if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( o, PDF_NAME('Subtype')), PDF_NAME('Link')):
  16377. continue
  16378. action = mupdf.pdf_dict_get( o, PDF_NAME('A'))
  16379. dest = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
  16380. if action.m_internal:
  16381. if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( action, PDF_NAME('S')), PDF_NAME('GoTo')):
  16382. continue
  16383. dest = mupdf.pdf_dict_get( action, PDF_NAME('D'))
  16384. pno = -1
  16385. if mupdf.pdf_is_array( dest):
  16386. target = mupdf.pdf_array_get( dest, 0)
  16387. pno = mupdf.pdf_lookup_page_number( pdf, target)
  16388. elif mupdf.pdf_is_string( dest):
  16389. location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest))
  16390. pno = location.page
  16391. if pno < 0: # page number lookup did not work
  16392. continue
  16393. n1 = pno
  16394. if n1 in numbers:
  16395. mupdf.pdf_array_delete( annots, j)
  16396. def ASSERT_PDF(cond):
  16397. assert isinstance(cond, (mupdf.PdfPage, mupdf.PdfDocument)), f'{type(cond)=} {cond=}'
  16398. if not cond.m_internal:
  16399. raise Exception(MSG_IS_NO_PDF)
  16400. def EMPTY_IRECT():
  16401. return IRect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
  16402. def EMPTY_QUAD():
  16403. return EMPTY_RECT().quad
  16404. def EMPTY_RECT():
  16405. return Rect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
  16406. def ENSURE_OPERATION(pdf):
  16407. if not JM_have_operation(pdf):
  16408. raise Exception("No journalling operation started")
  16409. def INFINITE_IRECT():
  16410. return IRect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
  16411. def INFINITE_QUAD():
  16412. return INFINITE_RECT().quad
  16413. def INFINITE_RECT():
  16414. return Rect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
  16415. def JM_BinFromBuffer(buffer_):
  16416. '''
  16417. Turn fz_buffer into a Python bytes object
  16418. '''
  16419. assert isinstance(buffer_, mupdf.FzBuffer)
  16420. ret = mupdf.fz_buffer_extract_copy(buffer_)
  16421. return ret
  16422. def JM_EscapeStrFromStr(c):
  16423. # `c` is typically from SWIG which will have converted a `const char*` from
  16424. # C into a Python `str` using `PyUnicode_DecodeUTF8(carray, static_cast<
  16425. # Py_ssize_t >(size), "surrogateescape")`. This gives us a Python `str`
  16426. # with some characters encoded as a \0xdcXY sequence, where `XY` are hex
  16427. # digits for an invalid byte in the original `const char*`.
  16428. #
  16429. # This is actually a reasonable way of representing arbitrary
  16430. # strings from C, but we want to mimic what PyMuPDF does. It uses
  16431. # `PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace")`
  16432. # which gives a string containing actual unicode characters for any invalid
  16433. # bytes.
  16434. #
  16435. # We mimic this by converting the `str` to a `bytes` with 'surrogateescape'
  16436. # to recognise \0xdcXY sequences, then convert the individual bytes into a
  16437. # `str` using `chr()`.
  16438. #
  16439. # Would be good to have a more efficient way to do this.
  16440. #
  16441. if c is None:
  16442. return ''
  16443. assert isinstance(c, str), f'{type(c)=}'
  16444. b = c.encode('utf8', 'surrogateescape')
  16445. ret = ''
  16446. for bb in b:
  16447. ret += chr(bb)
  16448. return ret
  16449. def JM_BufferFromBytes(stream):
  16450. '''
  16451. Make fz_buffer from a PyBytes, PyByteArray or io.BytesIO object. If a text
  16452. io.BytesIO, we convert to binary by encoding as utf8.
  16453. '''
  16454. if isinstance(stream, (bytes, bytearray)):
  16455. data = stream
  16456. elif hasattr(stream, 'getvalue'):
  16457. data = stream.getvalue()
  16458. if isinstance(data, str):
  16459. data = data.encode('utf-8')
  16460. if not isinstance(data, (bytes, bytearray)):
  16461. raise Exception(f'.getvalue() returned unexpected type: {type(data)}')
  16462. else:
  16463. return mupdf.FzBuffer()
  16464. return mupdf.fz_new_buffer_from_copied_data(data)
  16465. def JM_FLOAT_ITEM(obj, idx):
  16466. if not PySequence_Check(obj):
  16467. return None
  16468. return float(obj[idx])
  16469. def JM_INT_ITEM(obj, idx):
  16470. if idx < len(obj):
  16471. temp = obj[idx]
  16472. if isinstance(temp, (int, float)):
  16473. return 0, temp
  16474. return 1, None
  16475. def JM_pixmap_from_page(doc, page, ctm, cs, alpha, annots, clip):
  16476. '''
  16477. Pixmap creation directly using a short-lived displaylist, so we can support
  16478. separations.
  16479. '''
  16480. SPOTS_NONE = 0
  16481. SPOTS_OVERPRINT_SIM = 1
  16482. SPOTS_FULL = 2
  16483. FZ_ENABLE_SPOT_RENDERING = True # fixme: this is a build-time setting in MuPDF's config.h.
  16484. if FZ_ENABLE_SPOT_RENDERING:
  16485. spots = SPOTS_OVERPRINT_SIM
  16486. else:
  16487. spots = SPOTS_NONE
  16488. seps = None
  16489. colorspace = cs
  16490. matrix = JM_matrix_from_py(ctm)
  16491. rect = mupdf.fz_bound_page(page)
  16492. rclip = JM_rect_from_py(clip)
  16493. rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
  16494. rect = mupdf.fz_transform_rect(rect, matrix)
  16495. bbox = mupdf.fz_round_rect(rect)
  16496. # Pixmap of the document's /OutputIntents ("output intents")
  16497. oi = mupdf.fz_document_output_intent(doc)
  16498. # if present and compatible, use it instead of the parameter
  16499. if oi.m_internal:
  16500. if mupdf.fz_colorspace_n(oi) == mupdf.fz_colorspace_n(cs):
  16501. colorspace = mupdf.fz_keep_colorspace(oi)
  16502. # check if spots rendering is available and if so use separations
  16503. if spots != SPOTS_NONE:
  16504. seps = mupdf.fz_page_separations(page)
  16505. if seps.m_internal:
  16506. n = mupdf.fz_count_separations(seps)
  16507. if spots == SPOTS_FULL:
  16508. for i in range(n):
  16509. mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_SPOT)
  16510. else:
  16511. for i in range(n):
  16512. mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_COMPOSITE)
  16513. elif mupdf.fz_page_uses_overprint(page):
  16514. # This page uses overprint, so we need an empty
  16515. # sep object to force the overprint simulation on.
  16516. seps = mupdf.fz_new_separations(0)
  16517. elif oi.m_internal and mupdf.fz_colorspace_n(oi) != mupdf.fz_colorspace_n(colorspace):
  16518. # We have an output intent, and it's incompatible
  16519. # with the colorspace our device needs. Force the
  16520. # overprint simulation on, because this ensures that
  16521. # we 'simulate' the output intent too.
  16522. seps = mupdf.fz_new_separations(0)
  16523. pix = mupdf.fz_new_pixmap_with_bbox(colorspace, bbox, seps, alpha)
  16524. if alpha:
  16525. mupdf.fz_clear_pixmap(pix)
  16526. else:
  16527. mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
  16528. dev = mupdf.fz_new_draw_device(matrix, pix)
  16529. if annots:
  16530. mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  16531. else:
  16532. mupdf.fz_run_page_contents(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  16533. mupdf.fz_close_device(dev)
  16534. return pix
  16535. def JM_StrAsChar(x):
  16536. # fixme: should encode, but swig doesn't pass bytes to C as const char*.
  16537. return x
  16538. #return x.encode('utf8')
  16539. def JM_TUPLE(o: typing.Sequence) -> tuple:
  16540. return tuple(map(lambda x: round(x, 5) if abs(x) >= 1e-4 else 0, o))
  16541. def JM_TUPLE3(o: typing.Sequence) -> tuple:
  16542. return tuple(map(lambda x: round(x, 3) if abs(x) >= 1e-3 else 0, o))
  16543. def JM_UnicodeFromStr(s):
  16544. if s is None:
  16545. return ''
  16546. if isinstance(s, bytes):
  16547. s = s.decode('utf8')
  16548. assert isinstance(s, str), f'{type(s)=} {s=}'
  16549. return s
  16550. def JM_add_annot_id(annot, stem):
  16551. '''
  16552. Add a unique /NM key to an annotation or widget.
  16553. Append a number to 'stem' such that the result is a unique name.
  16554. '''
  16555. assert isinstance(annot, mupdf.PdfAnnot)
  16556. page = _pdf_annot_page(annot)
  16557. annot_obj = mupdf.pdf_annot_obj( annot)
  16558. names = JM_get_annot_id_list(page)
  16559. i = 0
  16560. while 1:
  16561. stem_id = f'{JM_annot_id_stem}-{stem}{i}'
  16562. if stem_id not in names:
  16563. break
  16564. i += 1
  16565. response = JM_StrAsChar(stem_id)
  16566. name = mupdf.pdf_new_string( response, len(response))
  16567. mupdf.pdf_dict_puts(annot_obj, "NM", name)
  16568. page.doc().m_internal.resynth_required = 0
  16569. def JM_add_oc_object(pdf, ref, xref):
  16570. '''
  16571. Add OC object reference to a dictionary
  16572. '''
  16573. indobj = mupdf.pdf_new_indirect(pdf, xref, 0)
  16574. if not mupdf.pdf_is_dict(indobj):
  16575. RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
  16576. type_ = mupdf.pdf_dict_get(indobj, PDF_NAME('Type'))
  16577. if (mupdf.pdf_objcmp(type_, PDF_NAME('OCG')) == 0
  16578. or mupdf.pdf_objcmp(type_, PDF_NAME('OCMD')) == 0
  16579. ):
  16580. mupdf.pdf_dict_put(ref, PDF_NAME('OC'), indobj)
  16581. else:
  16582. RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
  16583. def JM_annot_border(annot_obj):
  16584. dash_py = list()
  16585. style = None
  16586. width = -1
  16587. clouds = -1
  16588. obj = None
  16589. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Border'))
  16590. if mupdf.pdf_is_array( obj):
  16591. width = mupdf.pdf_to_real( mupdf.pdf_array_get( obj, 2))
  16592. if mupdf.pdf_array_len( obj) == 4:
  16593. dash = mupdf.pdf_array_get( obj, 3)
  16594. for i in range( mupdf.pdf_array_len( dash)):
  16595. val = mupdf.pdf_to_int( mupdf.pdf_array_get( dash, i))
  16596. dash_py.append( val)
  16597. bs_o = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BS'))
  16598. if bs_o.m_internal:
  16599. width = mupdf.pdf_to_real( mupdf.pdf_dict_get( bs_o, PDF_NAME('W')))
  16600. style = mupdf.pdf_to_name( mupdf.pdf_dict_get( bs_o, PDF_NAME('S')))
  16601. if style == '':
  16602. style = None
  16603. obj = mupdf.pdf_dict_get( bs_o, PDF_NAME('D'))
  16604. if obj.m_internal:
  16605. for i in range( mupdf.pdf_array_len( obj)):
  16606. val = mupdf.pdf_to_int( mupdf.pdf_array_get( obj, i))
  16607. dash_py.append( val)
  16608. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
  16609. if obj.m_internal:
  16610. clouds = mupdf.pdf_to_int( mupdf.pdf_dict_get( obj, PDF_NAME('I')))
  16611. res = dict()
  16612. res[ dictkey_width] = width
  16613. res[ dictkey_dashes] = tuple( dash_py)
  16614. res[ dictkey_style] = style
  16615. res[ 'clouds'] = clouds
  16616. return res
  16617. def JM_annot_colors(annot_obj):
  16618. res = dict()
  16619. bc = list() # stroke colors
  16620. fc =list() # fill colors
  16621. o = mupdf.pdf_dict_get(annot_obj, mupdf.PDF_ENUM_NAME_C)
  16622. if mupdf.pdf_is_array(o):
  16623. n = mupdf.pdf_array_len(o)
  16624. for i in range(n):
  16625. col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
  16626. bc.append(col)
  16627. res[dictkey_stroke] = bc
  16628. o = mupdf.pdf_dict_gets(annot_obj, "IC")
  16629. if mupdf.pdf_is_array(o):
  16630. n = mupdf.pdf_array_len(o)
  16631. for i in range(n):
  16632. col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
  16633. fc.append(col)
  16634. res[dictkey_fill] = fc
  16635. return res
  16636. def JM_annot_set_border( border, doc, annot_obj):
  16637. assert isinstance(border, dict)
  16638. obj = None
  16639. dashlen = 0
  16640. nwidth = border.get( dictkey_width) # new width
  16641. ndashes = border.get( dictkey_dashes) # new dashes
  16642. nstyle = border.get( dictkey_style) # new style
  16643. nclouds = border.get( 'clouds', -1) # new clouds value
  16644. # get old border properties
  16645. oborder = JM_annot_border( annot_obj)
  16646. # delete border-related entries
  16647. mupdf.pdf_dict_del( annot_obj, PDF_NAME('BS'))
  16648. mupdf.pdf_dict_del( annot_obj, PDF_NAME('BE'))
  16649. mupdf.pdf_dict_del( annot_obj, PDF_NAME('Border'))
  16650. # populate border items: keep old values for any omitted new ones
  16651. if nwidth < 0:
  16652. nwidth = oborder.get( dictkey_width) # no new width: keep current
  16653. if ndashes is None:
  16654. ndashes = oborder.get( dictkey_dashes) # no new dashes: keep old
  16655. if nstyle is None:
  16656. nstyle = oborder.get( dictkey_style) # no new style: keep old
  16657. if nclouds < 0:
  16658. nclouds = oborder.get( "clouds", -1) # no new clouds: keep old
  16659. if isinstance( ndashes, tuple) and len( ndashes) > 0:
  16660. dashlen = len( ndashes)
  16661. darr = mupdf.pdf_new_array( doc, dashlen)
  16662. for d in ndashes:
  16663. mupdf.pdf_array_push_int( darr, d)
  16664. mupdf.pdf_dict_putl( annot_obj, darr, PDF_NAME('BS'), PDF_NAME('D'))
  16665. mupdf.pdf_dict_putl(
  16666. annot_obj,
  16667. mupdf.pdf_new_real( nwidth),
  16668. PDF_NAME('BS'),
  16669. PDF_NAME('W'),
  16670. )
  16671. if dashlen == 0:
  16672. obj = JM_get_border_style( nstyle)
  16673. else:
  16674. obj = PDF_NAME('D')
  16675. mupdf.pdf_dict_putl( annot_obj, obj, PDF_NAME('BS'), PDF_NAME('S'))
  16676. if nclouds > 0:
  16677. mupdf.pdf_dict_put_dict( annot_obj, PDF_NAME('BE'), 2)
  16678. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
  16679. mupdf.pdf_dict_put( obj, PDF_NAME('S'), PDF_NAME('C'))
  16680. mupdf.pdf_dict_put_int( obj, PDF_NAME('I'), nclouds)
  16681. def make_escape(ch):
  16682. if ch == 92:
  16683. return "\\u005c"
  16684. elif 32 <= ch <= 127 or ch == 10:
  16685. return chr(ch)
  16686. elif 0xd800 <= ch <= 0xdfff: # orphaned surrogate
  16687. return "\\ufffd"
  16688. elif ch <= 0xffff:
  16689. return "\\u%04x" % ch
  16690. else:
  16691. return "\\U%08x" % ch
  16692. def JM_append_rune(buff, ch):
  16693. """
  16694. APPEND non-ascii runes in unicode escape format to fz_buffer.
  16695. """
  16696. mupdf.fz_append_string(buff, make_escape(ch))
  16697. def JM_append_word(lines, buff, wbbox, block_n, line_n, word_n):
  16698. '''
  16699. Functions for wordlist output
  16700. '''
  16701. s = JM_EscapeStrFromBuffer(buff)
  16702. litem = (
  16703. wbbox.x0,
  16704. wbbox.y0,
  16705. wbbox.x1,
  16706. wbbox.y1,
  16707. s,
  16708. block_n,
  16709. line_n,
  16710. word_n,
  16711. )
  16712. lines.append(litem)
  16713. return word_n + 1, mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word counter
  16714. def JM_add_layer_config( pdf, name, creator, ON):
  16715. '''
  16716. Add OC configuration to the PDF catalog
  16717. '''
  16718. ocp = JM_ensure_ocproperties( pdf)
  16719. configs = mupdf.pdf_dict_get( ocp, PDF_NAME('Configs'))
  16720. if not mupdf.pdf_is_array( configs):
  16721. configs = mupdf.pdf_dict_put_array( ocp, PDF_NAME('Configs'), 1)
  16722. D = mupdf.pdf_new_dict( pdf, 5)
  16723. mupdf.pdf_dict_put_text_string( D, PDF_NAME('Name'), name)
  16724. if creator is not None:
  16725. mupdf.pdf_dict_put_text_string( D, PDF_NAME('Creator'), creator)
  16726. mupdf.pdf_dict_put( D, PDF_NAME('BaseState'), PDF_NAME('OFF'))
  16727. onarray = mupdf.pdf_dict_put_array( D, PDF_NAME('ON'), 5)
  16728. if not ON:
  16729. pass
  16730. else:
  16731. ocgs = mupdf.pdf_dict_get( ocp, PDF_NAME('OCGs'))
  16732. n = len(ON)
  16733. for i in range(n):
  16734. xref = 0
  16735. e, xref = JM_INT_ITEM(ON, i)
  16736. if e == 1:
  16737. continue
  16738. ind = mupdf.pdf_new_indirect( pdf, xref, 0)
  16739. if mupdf.pdf_array_contains( ocgs, ind):
  16740. mupdf.pdf_array_push( onarray, ind)
  16741. mupdf.pdf_array_push( configs, D)
  16742. def JM_char_bbox(line, ch):
  16743. '''
  16744. return rect of char quad
  16745. '''
  16746. q = JM_char_quad(line, ch)
  16747. r = mupdf.fz_rect_from_quad(q)
  16748. if not line.m_internal.wmode:
  16749. return r
  16750. if r.y1 < r.y0 + ch.m_internal.size:
  16751. r.y0 = r.y1 - ch.m_internal.size
  16752. return r
  16753. def JM_char_font_flags(font, line, ch):
  16754. flags = 0
  16755. if line and ch:
  16756. flags += detect_super_script(line, ch)
  16757. flags += mupdf.fz_font_is_italic(font) * TEXT_FONT_ITALIC
  16758. flags += mupdf.fz_font_is_serif(font) * TEXT_FONT_SERIFED
  16759. flags += mupdf.fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED
  16760. flags += mupdf.fz_font_is_bold(font) * TEXT_FONT_BOLD
  16761. return flags
  16762. def JM_char_quad(line, ch):
  16763. '''
  16764. re-compute char quad if ascender/descender values make no sense
  16765. '''
  16766. if 1 and g_use_extra:
  16767. # This reduces time taken to extract text from PyMuPDF.pdf from 20s to
  16768. # 15s.
  16769. return mupdf.FzQuad(extra.JM_char_quad( line.m_internal, ch.m_internal))
  16770. assert isinstance(line, mupdf.FzStextLine)
  16771. assert isinstance(ch, mupdf.FzStextChar)
  16772. if _globals.skip_quad_corrections: # no special handling
  16773. return ch.quad
  16774. if line.m_internal.wmode: # never touch vertical write mode
  16775. return ch.quad
  16776. font = mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))
  16777. asc = JM_font_ascender(font)
  16778. dsc = JM_font_descender(font)
  16779. fsize = ch.m_internal.size
  16780. asc_dsc = asc - dsc + FLT_EPSILON
  16781. if asc_dsc >= 1 and _globals.small_glyph_heights == 0: # no problem
  16782. return mupdf.FzQuad(ch.m_internal.quad)
  16783. # Re-compute quad with adjusted ascender / descender values:
  16784. # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
  16785. # re-rotate and move back to ch->origin location.
  16786. fsize = ch.m_internal.size
  16787. bbox = mupdf.fz_font_bbox(font)
  16788. fwidth = bbox.x1 - bbox.x0
  16789. if asc < 1e-3: # probably Tesseract glyphless font
  16790. dsc = -0.1
  16791. asc = 0.9
  16792. asc_dsc = 1.0
  16793. if _globals.small_glyph_heights or asc_dsc < 1:
  16794. dsc = dsc / asc_dsc
  16795. asc = asc / asc_dsc
  16796. asc_dsc = asc - dsc
  16797. asc = asc * fsize / asc_dsc
  16798. dsc = dsc * fsize / asc_dsc
  16799. # Re-compute quad with the adjusted ascender / descender values:
  16800. # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
  16801. # re-rotate and move back to ch->origin location.
  16802. c = line.m_internal.dir.x # cosine
  16803. s = line.m_internal.dir.y # sine
  16804. trm1 = mupdf.fz_make_matrix(c, -s, s, c, 0, 0) # derotate
  16805. trm2 = mupdf.fz_make_matrix(c, s, -s, c, 0, 0) # rotate
  16806. if (c == -1): # left-right flip
  16807. trm1.d = 1
  16808. trm2.d = 1
  16809. xlate1 = mupdf.fz_make_matrix(1, 0, 0, 1, -ch.m_internal.origin.x, -ch.m_internal.origin.y)
  16810. xlate2 = mupdf.fz_make_matrix(1, 0, 0, 1, ch.m_internal.origin.x, ch.m_internal.origin.y)
  16811. quad = mupdf.fz_transform_quad(mupdf.FzQuad(ch.m_internal.quad), xlate1) # move origin to (0,0)
  16812. quad = mupdf.fz_transform_quad(quad, trm1) # de-rotate corners
  16813. # adjust vertical coordinates
  16814. if c == 1 and quad.ul.y > 0: # up-down flip
  16815. quad.ul.y = asc
  16816. quad.ur.y = asc
  16817. quad.ll.y = dsc
  16818. quad.lr.y = dsc
  16819. else:
  16820. quad.ul.y = -asc
  16821. quad.ur.y = -asc
  16822. quad.ll.y = -dsc
  16823. quad.lr.y = -dsc
  16824. # adjust horizontal coordinates that are too crazy:
  16825. # (1) left x must be >= 0
  16826. # (2) if bbox width is 0, lookup char advance in font.
  16827. if quad.ll.x < 0:
  16828. quad.ll.x = 0
  16829. quad.ul.x = 0
  16830. cwidth = quad.lr.x - quad.ll.x
  16831. if cwidth < FLT_EPSILON:
  16832. glyph = mupdf.fz_encode_character( font, ch.m_internal.c)
  16833. if glyph:
  16834. fwidth = mupdf.fz_advance_glyph( font, glyph, line.m_internal.wmode)
  16835. quad.lr.x = quad.ll.x + fwidth * fsize
  16836. quad.ur.x = quad.lr.x
  16837. quad = mupdf.fz_transform_quad(quad, trm2) # rotate back
  16838. quad = mupdf.fz_transform_quad(quad, xlate2) # translate back
  16839. return quad
  16840. def JM_choice_options(annot):
  16841. '''
  16842. return list of choices for list or combo boxes
  16843. '''
  16844. annot_obj = mupdf.pdf_annot_obj( annot.this)
  16845. opts = mupdf.pdf_choice_widget_options2( annot, 0)
  16846. n = len( opts)
  16847. if n == 0:
  16848. return # wrong widget type
  16849. optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Opt'))
  16850. liste = []
  16851. for i in range( n):
  16852. m = mupdf.pdf_array_len( mupdf.pdf_array_get( optarr, i))
  16853. if m == 2:
  16854. val = (
  16855. mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 0)),
  16856. mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 1)),
  16857. )
  16858. liste.append( val)
  16859. else:
  16860. val = mupdf.pdf_to_text_string( mupdf.pdf_array_get( optarr, i))
  16861. liste.append( val)
  16862. return liste
  16863. def JM_clear_pixmap_rect_with_value(dest, value, b):
  16864. '''
  16865. Clear a pixmap rectangle - my version also supports non-alpha pixmaps
  16866. '''
  16867. b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox(dest))
  16868. w = b.x1 - b.x0
  16869. y = b.y1 - b.y0
  16870. if w <= 0 or y <= 0:
  16871. return 0
  16872. destspan = dest.stride()
  16873. destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
  16874. # CMYK needs special handling (and potentially any other subtractive colorspaces)
  16875. if mupdf.fz_colorspace_n(dest.colorspace()) == 4:
  16876. value = 255 - value
  16877. while 1:
  16878. s = destp
  16879. for x in range(0, w):
  16880. mupdf.fz_samples_set(dest, s, 0)
  16881. s += 1
  16882. mupdf.fz_samples_set(dest, s, 0)
  16883. s += 1
  16884. mupdf.fz_samples_set(dest, s, 0)
  16885. s += 1
  16886. mupdf.fz_samples_set(dest, s, value)
  16887. s += 1
  16888. if dest.alpha():
  16889. mupdf.fz_samples_set(dest, s, 255)
  16890. s += 1
  16891. destp += destspan
  16892. if y == 0:
  16893. break
  16894. y -= 1
  16895. return 1
  16896. while 1:
  16897. s = destp
  16898. for x in range(w):
  16899. for k in range(dest.n()-1):
  16900. mupdf.fz_samples_set(dest, s, value)
  16901. s += 1
  16902. if dest.alpha():
  16903. mupdf.fz_samples_set(dest, s, 255)
  16904. s += 1
  16905. else:
  16906. mupdf.fz_samples_set(dest, s, value)
  16907. s += 1
  16908. destp += destspan
  16909. if y == 0:
  16910. break
  16911. y -= 1
  16912. return 1
  16913. def JM_color_FromSequence(color):
  16914. if isinstance(color, (int, float)): # maybe just a single float
  16915. color = [color]
  16916. if not isinstance( color, (list, tuple)):
  16917. return -1, []
  16918. if len(color) not in (0, 1, 3, 4):
  16919. return -1, []
  16920. ret = color[:]
  16921. for i in range(len(ret)):
  16922. if ret[i] < 0 or ret[i] > 1:
  16923. ret[i] = 1
  16924. return len(ret), ret
  16925. def JM_color_count( pm, clip):
  16926. if 1 or g_use_extra:
  16927. return extra.ll_JM_color_count(pm.m_internal, clip)
  16928. rc = dict()
  16929. cnt = 0
  16930. irect = mupdf.fz_pixmap_bbox( pm)
  16931. irect = mupdf.fz_intersect_irect(irect, mupdf.fz_round_rect(JM_rect_from_py(clip)))
  16932. stride = pm.stride()
  16933. width = irect.x1 - irect.x0
  16934. height = irect.y1 - irect.y0
  16935. n = pm.n()
  16936. substride = width * n
  16937. s = stride * (irect.y0 - pm.y()) + (irect.x0 - pm.x()) * n
  16938. oldpix = _read_samples( pm, s, n)
  16939. cnt = 0
  16940. if mupdf.fz_is_empty_irect(irect):
  16941. return rc
  16942. for i in range( height):
  16943. for j in range( 0, substride, n):
  16944. newpix = _read_samples( pm, s + j, n)
  16945. if newpix != oldpix:
  16946. pixel = oldpix
  16947. c = rc.get( pixel, None)
  16948. if c is not None:
  16949. cnt += c
  16950. rc[ pixel] = cnt
  16951. cnt = 1
  16952. oldpix = newpix
  16953. else:
  16954. cnt += 1
  16955. s += stride
  16956. pixel = oldpix
  16957. c = rc.get( pixel)
  16958. if c is not None:
  16959. cnt += c
  16960. rc[ pixel] = cnt
  16961. return rc
  16962. def JM_compress_buffer(inbuffer):
  16963. '''
  16964. compress char* into a new buffer
  16965. '''
  16966. data, compressed_length = mupdf.fz_new_deflated_data_from_buffer(
  16967. inbuffer,
  16968. mupdf.FZ_DEFLATE_BEST,
  16969. )
  16970. #log( '{=data compressed_length}')
  16971. if not data or compressed_length == 0:
  16972. return None
  16973. buf = mupdf.FzBuffer(mupdf.fz_new_buffer_from_data(data, compressed_length))
  16974. mupdf.fz_resize_buffer(buf, compressed_length)
  16975. return buf
  16976. def JM_copy_rectangle(page, area):
  16977. need_new_line = 0
  16978. buffer = io.StringIO()
  16979. for block in page:
  16980. if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
  16981. continue
  16982. for line in block:
  16983. line_had_text = 0
  16984. for ch in line:
  16985. r = JM_char_bbox(line, ch)
  16986. if JM_rects_overlap(area, r):
  16987. line_had_text = 1
  16988. if need_new_line:
  16989. buffer.write("\n")
  16990. need_new_line = 0
  16991. buffer.write(make_escape(ch.m_internal.c))
  16992. if line_had_text:
  16993. need_new_line = 1
  16994. s = buffer.getvalue() # take over the data
  16995. return s
  16996. def JM_convert_to_pdf(doc, fp, tp, rotate):
  16997. '''
  16998. Convert any MuPDF document to a PDF
  16999. Returns bytes object containing the PDF, created via 'write' function.
  17000. '''
  17001. pdfout = mupdf.PdfDocument()
  17002. incr = 1
  17003. s = fp
  17004. e = tp
  17005. if fp > tp:
  17006. incr = -1 # count backwards
  17007. s = tp # adjust ...
  17008. e = fp # ... range
  17009. rot = JM_norm_rotation(rotate)
  17010. i = fp
  17011. while 1: # interpret & write document pages as PDF pages
  17012. if not _INRANGE(i, s, e):
  17013. break
  17014. page = mupdf.fz_load_page(doc, i)
  17015. mediabox = mupdf.fz_bound_page(page)
  17016. dev, resources, contents = mupdf.pdf_page_write(pdfout, mediabox)
  17017. mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  17018. mupdf.fz_close_device(dev)
  17019. dev = None
  17020. page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents)
  17021. mupdf.pdf_insert_page(pdfout, -1, page_obj)
  17022. i += incr
  17023. # PDF created - now write it to Python bytearray
  17024. # prepare write options structure
  17025. opts = mupdf.PdfWriteOptions()
  17026. opts.do_garbage = 4
  17027. opts.do_compress = 1
  17028. opts.do_compress_images = 1
  17029. opts.do_compress_fonts = 1
  17030. opts.do_sanitize = 1
  17031. opts.do_incremental = 0
  17032. opts.do_ascii = 0
  17033. opts.do_decompress = 0
  17034. opts.do_linear = 0
  17035. opts.do_clean = 1
  17036. opts.do_pretty = 0
  17037. res = mupdf.fz_new_buffer(8192)
  17038. out = mupdf.FzOutput(res)
  17039. mupdf.pdf_write_document(pdfout, out, opts)
  17040. out.fz_close_output()
  17041. c = mupdf.fz_buffer_extract_copy(res)
  17042. assert isinstance(c, bytes)
  17043. return c
  17044. # Copied from MuPDF v1.14
  17045. # Create widget
  17046. def JM_create_widget(doc, page, type, fieldname):
  17047. old_sigflags = mupdf.pdf_to_int(mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/SigFlags"))
  17048. #log( '*** JM_create_widget()')
  17049. #log( f'{mupdf.pdf_create_annot_raw=}')
  17050. #log( f'{page=}')
  17051. #log( f'{mupdf.PDF_ANNOT_WIDGET=}')
  17052. annot = mupdf.pdf_create_annot_raw(page, mupdf.PDF_ANNOT_WIDGET)
  17053. annot_obj = mupdf.pdf_annot_obj(annot)
  17054. try:
  17055. JM_set_field_type(doc, annot_obj, type)
  17056. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), fieldname)
  17057. if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  17058. sigflags = old_sigflags | (SigFlag_SignaturesExist | SigFlag_AppendOnly)
  17059. mupdf.pdf_dict_putl(
  17060. mupdf.pdf_trailer(doc),
  17061. mupdf.pdf_new_int(sigflags),
  17062. PDF_NAME('Root'),
  17063. PDF_NAME('AcroForm'),
  17064. PDF_NAME('SigFlags'),
  17065. )
  17066. # pdf_create_annot will have linked the new widget into the page's
  17067. # annot array. We also need it linked into the document's form
  17068. form = mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/Fields")
  17069. if not form.m_internal:
  17070. form = mupdf.pdf_new_array(doc, 1)
  17071. mupdf.pdf_dict_putl(
  17072. mupdf.pdf_trailer(doc),
  17073. form,
  17074. PDF_NAME('Root'),
  17075. PDF_NAME('AcroForm'),
  17076. PDF_NAME('Fields'),
  17077. )
  17078. mupdf.pdf_array_push(form, annot_obj) # Cleanup relies on this statement being last
  17079. except Exception:
  17080. if g_exceptions_verbose: exception_info()
  17081. mupdf.pdf_delete_annot(page, annot)
  17082. if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  17083. mupdf.pdf_dict_putl(
  17084. mupdf.pdf_trailer(doc),
  17085. mupdf.pdf_new_int(old_sigflags),
  17086. PDF_NAME('Root'),
  17087. PDF_NAME('AcroForm'),
  17088. PDF_NAME('SigFlags'),
  17089. )
  17090. raise
  17091. return annot
  17092. def JM_cropbox(page_obj):
  17093. '''
  17094. return a PDF page's CropBox
  17095. '''
  17096. if g_use_extra:
  17097. return extra.JM_cropbox(page_obj)
  17098. mediabox = JM_mediabox(page_obj)
  17099. cropbox = mupdf.pdf_to_rect(
  17100. mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('CropBox'))
  17101. )
  17102. if mupdf.fz_is_infinite_rect(cropbox) or mupdf.fz_is_empty_rect(cropbox):
  17103. cropbox = mediabox
  17104. y0 = mediabox.y1 - cropbox.y1
  17105. y1 = mediabox.y1 - cropbox.y0
  17106. cropbox.y0 = y0
  17107. cropbox.y1 = y1
  17108. return cropbox
  17109. def JM_cropbox_size(page_obj):
  17110. rect = JM_cropbox(page_obj)
  17111. w = abs(rect.x1 - rect.x0)
  17112. h = abs(rect.y1 - rect.y0)
  17113. size = mupdf.fz_make_point(w, h)
  17114. return size
  17115. def JM_derotate_page_matrix(page):
  17116. '''
  17117. just the inverse of rotation
  17118. '''
  17119. mp = JM_rotate_page_matrix(page)
  17120. return mupdf.fz_invert_matrix(mp)
  17121. def JM_embed_file(
  17122. pdf,
  17123. buf,
  17124. filename,
  17125. ufilename,
  17126. desc,
  17127. compress,
  17128. ):
  17129. '''
  17130. embed a new file in a PDF (not only /EmbeddedFiles entries)
  17131. '''
  17132. len_ = 0
  17133. val = mupdf.pdf_new_dict(pdf, 6)
  17134. mupdf.pdf_dict_put_dict(val, PDF_NAME('CI'), 4)
  17135. ef = mupdf.pdf_dict_put_dict(val, PDF_NAME('EF'), 4)
  17136. mupdf.pdf_dict_put_text_string(val, PDF_NAME('F'), filename)
  17137. mupdf.pdf_dict_put_text_string(val, PDF_NAME('UF'), ufilename)
  17138. mupdf.pdf_dict_put_text_string(val, PDF_NAME('Desc'), desc)
  17139. mupdf.pdf_dict_put(val, PDF_NAME('Type'), PDF_NAME('Filespec'))
  17140. bs = b' '
  17141. f = mupdf.pdf_add_stream(
  17142. pdf,
  17143. #mupdf.fz_fz_new_buffer_from_copied_data(bs),
  17144. mupdf.fz_new_buffer_from_copied_data(bs),
  17145. mupdf.PdfObj(),
  17146. 0,
  17147. )
  17148. mupdf.pdf_dict_put(ef, PDF_NAME('F'), f)
  17149. JM_update_stream(pdf, f, buf, compress)
  17150. len_, _ = mupdf.fz_buffer_storage(buf)
  17151. mupdf.pdf_dict_put_int(f, PDF_NAME('DL'), len_)
  17152. mupdf.pdf_dict_put_int(f, PDF_NAME('Length'), len_)
  17153. params = mupdf.pdf_dict_put_dict(f, PDF_NAME('Params'), 4)
  17154. mupdf.pdf_dict_put_int(params, PDF_NAME('Size'), len_)
  17155. return val
  17156. def JM_embedded_clean(pdf):
  17157. '''
  17158. perform some cleaning if we have /EmbeddedFiles:
  17159. (1) remove any /Limits if /Names exists
  17160. (2) remove any empty /Collection
  17161. (3) set /PageMode/UseAttachments
  17162. '''
  17163. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  17164. # remove any empty /Collection entry
  17165. coll = mupdf.pdf_dict_get(root, PDF_NAME('Collection'))
  17166. if coll.m_internal and mupdf.pdf_dict_len(coll) == 0:
  17167. mupdf.pdf_dict_del(root, PDF_NAME('Collection'))
  17168. efiles = mupdf.pdf_dict_getl(
  17169. root,
  17170. PDF_NAME('Names'),
  17171. PDF_NAME('EmbeddedFiles'),
  17172. PDF_NAME('Names'),
  17173. )
  17174. if efiles.m_internal:
  17175. mupdf.pdf_dict_put_name(root, PDF_NAME('PageMode'), "UseAttachments")
  17176. def JM_EscapeStrFromBuffer(buff):
  17177. if not buff.m_internal:
  17178. return ''
  17179. s = mupdf.fz_buffer_extract_copy(buff)
  17180. val = PyUnicode_DecodeRawUnicodeEscape(s, errors='replace')
  17181. return val
  17182. def JM_ensure_identity(pdf):
  17183. '''
  17184. Store ID in PDF trailer
  17185. '''
  17186. id_ = mupdf.pdf_dict_get( mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
  17187. if not id_.m_internal:
  17188. rnd0 = mupdf.fz_memrnd2(16)
  17189. # Need to convert raw bytes into a str to send to
  17190. # mupdf.pdf_new_string(). chr() seems to work for this.
  17191. rnd = ''
  17192. for i in rnd0:
  17193. rnd += chr(i)
  17194. id_ = mupdf.pdf_dict_put_array( mupdf.pdf_trailer( pdf), PDF_NAME('ID'), 2)
  17195. mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
  17196. mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
  17197. def JM_ensure_ocproperties(pdf):
  17198. '''
  17199. Ensure OCProperties, return /OCProperties key
  17200. '''
  17201. ocp = mupdf.pdf_dict_get(mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')), PDF_NAME('OCProperties'))
  17202. if ocp.m_internal:
  17203. return ocp
  17204. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  17205. ocp = mupdf.pdf_dict_put_dict(root, PDF_NAME('OCProperties'), 2)
  17206. mupdf.pdf_dict_put_array(ocp, PDF_NAME('OCGs'), 0)
  17207. D = mupdf.pdf_dict_put_dict(ocp, PDF_NAME('D'), 5)
  17208. mupdf.pdf_dict_put_array(D, PDF_NAME('ON'), 0)
  17209. mupdf.pdf_dict_put_array(D, PDF_NAME('OFF'), 0)
  17210. mupdf.pdf_dict_put_array(D, PDF_NAME('Order'), 0)
  17211. mupdf.pdf_dict_put_array(D, PDF_NAME('RBGroups'), 0)
  17212. return ocp
  17213. def JM_expand_fname(name):
  17214. '''
  17215. Make /DA string of annotation
  17216. '''
  17217. if not name: return "Helv"
  17218. if name.startswith("Co"): return "Cour"
  17219. if name.startswith("co"): return "Cour"
  17220. if name.startswith("Ti"): return "TiRo"
  17221. if name.startswith("ti"): return "TiRo"
  17222. if name.startswith("Sy"): return "Symb"
  17223. if name.startswith("sy"): return "Symb"
  17224. if name.startswith("Za"): return "ZaDb"
  17225. if name.startswith("za"): return "ZaDb"
  17226. return "Helv"
  17227. def JM_field_type_text(wtype):
  17228. '''
  17229. String from widget type
  17230. '''
  17231. if wtype == mupdf.PDF_WIDGET_TYPE_BUTTON:
  17232. return "Button"
  17233. if wtype == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
  17234. return "CheckBox"
  17235. if wtype == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  17236. return "RadioButton"
  17237. if wtype == mupdf.PDF_WIDGET_TYPE_TEXT:
  17238. return "Text"
  17239. if wtype == mupdf.PDF_WIDGET_TYPE_LISTBOX:
  17240. return "ListBox"
  17241. if wtype == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
  17242. return "ComboBox"
  17243. if wtype == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  17244. return "Signature"
  17245. return "unknown"
  17246. def JM_fill_pixmap_rect_with_color(dest, col, b):
  17247. assert isinstance(dest, mupdf.FzPixmap)
  17248. # fill a rect with a color tuple
  17249. b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest))
  17250. w = b.x1 - b.x0
  17251. y = b.y1 - b.y0
  17252. if w <= 0 or y <= 0:
  17253. return 0
  17254. destspan = dest.stride()
  17255. destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
  17256. while 1:
  17257. s = destp
  17258. for x in range(w):
  17259. for i in range( dest.n()):
  17260. mupdf.fz_samples_set(dest, s, col[i])
  17261. s += 1
  17262. destp += destspan
  17263. y -= 1
  17264. if y == 0:
  17265. break
  17266. return 1
  17267. def JM_find_annot_irt(annot):
  17268. '''
  17269. Return the first annotation whose /IRT key ("In Response To") points to
  17270. annot. Used to remove the response chain of a given annotation.
  17271. '''
  17272. assert isinstance(annot, mupdf.PdfAnnot)
  17273. irt_annot = None # returning this
  17274. annot_obj = mupdf.pdf_annot_obj(annot)
  17275. found = 0
  17276. # loop thru MuPDF's internal annots array
  17277. page = _pdf_annot_page(annot)
  17278. irt_annot = mupdf.pdf_first_annot(page)
  17279. while 1:
  17280. assert isinstance(irt_annot, mupdf.PdfAnnot)
  17281. if not irt_annot.m_internal:
  17282. break
  17283. irt_annot_obj = mupdf.pdf_annot_obj(irt_annot)
  17284. o = mupdf.pdf_dict_gets(irt_annot_obj, 'IRT')
  17285. if o.m_internal:
  17286. if not mupdf.pdf_objcmp(o, annot_obj):
  17287. found = 1
  17288. break
  17289. irt_annot = mupdf.pdf_next_annot(irt_annot)
  17290. if found:
  17291. return irt_annot
  17292. def JM_font_ascender(font):
  17293. '''
  17294. need own versions of ascender / descender
  17295. '''
  17296. assert isinstance(font, mupdf.FzFont)
  17297. if _globals.skip_quad_corrections:
  17298. return 0.8
  17299. return mupdf.fz_font_ascender(font)
  17300. def JM_font_descender(font):
  17301. '''
  17302. need own versions of ascender / descender
  17303. '''
  17304. assert isinstance(font, mupdf.FzFont)
  17305. if _globals.skip_quad_corrections:
  17306. return -0.2
  17307. ret = mupdf.fz_font_descender(font)
  17308. return ret
  17309. def JM_is_word_delimiter(ch, delimiters):
  17310. """Check if ch is an extra word delimiting character.
  17311. """
  17312. if (0
  17313. or ch <= 32
  17314. or ch == 160
  17315. or 0x202a <= ch <= 0x202e
  17316. ):
  17317. # covers any whitespace plus unicodes that switch between
  17318. # right-to-left and left-to-right languages
  17319. return True
  17320. if not delimiters: # no extra delimiters provided
  17321. return False
  17322. char = chr(ch)
  17323. for d in delimiters:
  17324. if d == char:
  17325. return True
  17326. return False
  17327. def JM_is_rtl_char(ch):
  17328. if ch < 0x590 or ch > 0x900:
  17329. return False
  17330. return True
  17331. def JM_font_name(font):
  17332. assert isinstance(font, mupdf.FzFont)
  17333. name = mupdf.fz_font_name(font)
  17334. s = name.find('+')
  17335. if _globals.subset_fontnames or s == -1 or s != 6:
  17336. return name
  17337. return name[s + 1:]
  17338. def JM_gather_fonts(pdf, dict_, fontlist, stream_xref):
  17339. rc = 1
  17340. n = mupdf.pdf_dict_len(dict_)
  17341. for i in range(n):
  17342. refname = mupdf.pdf_dict_get_key(dict_, i)
  17343. fontdict = mupdf.pdf_dict_get_val(dict_, i)
  17344. if not mupdf.pdf_is_dict(fontdict):
  17345. mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no font dict ({mupdf.pdf_to_num(fontdict)} 0 R)")
  17346. continue
  17347. subtype = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Subtype)
  17348. basefont = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_BaseFont)
  17349. if not basefont.m_internal or mupdf.pdf_is_null(basefont):
  17350. name = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Name)
  17351. else:
  17352. name = basefont
  17353. encoding = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Encoding)
  17354. if mupdf.pdf_is_dict(encoding):
  17355. encoding = mupdf.pdf_dict_get(encoding, mupdf.PDF_ENUM_NAME_BaseEncoding)
  17356. xref = mupdf.pdf_to_num(fontdict)
  17357. ext = "n/a"
  17358. if xref:
  17359. ext = JM_get_fontextension(pdf, xref)
  17360. entry = (
  17361. xref,
  17362. ext,
  17363. mupdf.pdf_to_name(subtype),
  17364. JM_EscapeStrFromStr(mupdf.pdf_to_name(name)),
  17365. mupdf.pdf_to_name(refname),
  17366. mupdf.pdf_to_name(encoding),
  17367. stream_xref,
  17368. )
  17369. fontlist.append(entry)
  17370. return rc
  17371. def JM_gather_forms(doc, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
  17372. '''
  17373. Store info of a /Form xobject in Python list
  17374. '''
  17375. assert isinstance(doc, mupdf.PdfDocument)
  17376. rc = 1
  17377. n = mupdf.pdf_dict_len(dict_)
  17378. for i in range(n):
  17379. refname = mupdf.pdf_dict_get_key( dict_, i)
  17380. imagedict = mupdf.pdf_dict_get_val(dict_, i)
  17381. if not mupdf.pdf_is_dict(imagedict):
  17382. mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no form dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
  17383. continue
  17384. type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
  17385. if not mupdf.pdf_name_eq(type_, PDF_NAME('Form')):
  17386. continue
  17387. o = mupdf.pdf_dict_get(imagedict, PDF_NAME('BBox'))
  17388. m = mupdf.pdf_dict_get(imagedict, PDF_NAME('Matrix'))
  17389. if m.m_internal:
  17390. mat = mupdf.pdf_to_matrix(m)
  17391. else:
  17392. mat = mupdf.FzMatrix()
  17393. if o.m_internal:
  17394. bbox = mupdf.fz_transform_rect( mupdf.pdf_to_rect(o), mat)
  17395. else:
  17396. bbox = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
  17397. xref = mupdf.pdf_to_num(imagedict)
  17398. entry = (
  17399. xref,
  17400. mupdf.pdf_to_name( refname),
  17401. stream_xref,
  17402. JM_py_from_rect(bbox),
  17403. )
  17404. imagelist.append(entry)
  17405. return rc
  17406. def JM_gather_images(doc: mupdf.PdfDocument, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
  17407. '''
  17408. Store info of an image in Python list
  17409. '''
  17410. rc = 1
  17411. n = mupdf.pdf_dict_len( dict_)
  17412. for i in range(n):
  17413. refname = mupdf.pdf_dict_get_key(dict_, i)
  17414. imagedict = mupdf.pdf_dict_get_val(dict_, i)
  17415. if not mupdf.pdf_is_dict(imagedict):
  17416. mupdf.fz_warn(f"'{mupdf.pdf_to_name(refname)}' is no image dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
  17417. continue
  17418. type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
  17419. if not mupdf.pdf_name_eq(type_, PDF_NAME('Image')):
  17420. continue
  17421. xref = mupdf.pdf_to_num(imagedict)
  17422. gen = 0
  17423. smask = mupdf.pdf_dict_geta(imagedict, PDF_NAME('SMask'), PDF_NAME('Mask'))
  17424. if smask.m_internal:
  17425. gen = mupdf.pdf_to_num(smask)
  17426. filter_ = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Filter'), PDF_NAME('F'))
  17427. if mupdf.pdf_is_array(filter_):
  17428. filter_ = mupdf.pdf_array_get(filter_, 0)
  17429. altcs = mupdf.PdfObj(0)
  17430. cs = mupdf.pdf_dict_geta(imagedict, PDF_NAME('ColorSpace'), PDF_NAME('CS'))
  17431. if mupdf.pdf_is_array(cs):
  17432. cses = cs
  17433. cs = mupdf.pdf_array_get(cses, 0)
  17434. if (mupdf.pdf_name_eq(cs, PDF_NAME('DeviceN'))
  17435. or mupdf.pdf_name_eq(cs, PDF_NAME('Separation'))
  17436. ):
  17437. altcs = mupdf.pdf_array_get(cses, 2)
  17438. if mupdf.pdf_is_array(altcs):
  17439. altcs = mupdf.pdf_array_get(altcs, 0)
  17440. width = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Width'), PDF_NAME('W'))
  17441. height = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Height'), PDF_NAME('H'))
  17442. bpc = mupdf.pdf_dict_geta(imagedict, PDF_NAME('BitsPerComponent'), PDF_NAME('BPC'))
  17443. entry = (
  17444. xref,
  17445. gen,
  17446. mupdf.pdf_to_int(width),
  17447. mupdf.pdf_to_int(height),
  17448. mupdf.pdf_to_int(bpc),
  17449. JM_EscapeStrFromStr(mupdf.pdf_to_name(cs)),
  17450. JM_EscapeStrFromStr(mupdf.pdf_to_name(altcs)),
  17451. JM_EscapeStrFromStr(mupdf.pdf_to_name(refname)),
  17452. JM_EscapeStrFromStr(mupdf.pdf_to_name(filter_)),
  17453. stream_xref,
  17454. )
  17455. imagelist.append(entry)
  17456. return rc
  17457. def JM_get_annot_by_xref(page, xref):
  17458. '''
  17459. retrieve annot by its xref
  17460. '''
  17461. assert isinstance(page, mupdf.PdfPage)
  17462. found = 0
  17463. # loop thru MuPDF's internal annots array
  17464. annot = mupdf.pdf_first_annot(page)
  17465. while 1:
  17466. if not annot.m_internal:
  17467. break
  17468. if xref == mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)):
  17469. found = 1
  17470. break
  17471. annot = mupdf.pdf_next_annot( annot)
  17472. if not found:
  17473. raise Exception("xref %d is not an annot of this page" % xref)
  17474. return annot
  17475. def JM_get_annot_by_name(page, name):
  17476. '''
  17477. retrieve annot by name (/NM key)
  17478. '''
  17479. assert isinstance(page, mupdf.PdfPage)
  17480. if not name:
  17481. return
  17482. found = 0
  17483. # loop thru MuPDF's internal annots and widget arrays
  17484. annot = mupdf.pdf_first_annot(page)
  17485. while 1:
  17486. if not annot.m_internal:
  17487. break
  17488. response, len_ = mupdf.pdf_to_string(mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM"))
  17489. if name == response:
  17490. found = 1
  17491. break
  17492. annot = mupdf.pdf_next_annot(annot)
  17493. if not found:
  17494. raise Exception("'%s' is not an annot of this page" % name)
  17495. return annot
  17496. def JM_get_annot_id_list(page):
  17497. names = []
  17498. annots = mupdf.pdf_dict_get( page.obj(), mupdf.PDF_ENUM_NAME_Annots)
  17499. if not annots.m_internal:
  17500. return names
  17501. for i in range( mupdf.pdf_array_len(annots)):
  17502. annot_obj = mupdf.pdf_array_get(annots, i)
  17503. name = mupdf.pdf_dict_gets(annot_obj, "NM")
  17504. if name.m_internal:
  17505. names.append(
  17506. mupdf.pdf_to_text_string(name)
  17507. )
  17508. return names
  17509. def JM_get_annot_xref_list( page_obj):
  17510. '''
  17511. return the xrefs and /NM ids of a page's annots, links and fields
  17512. '''
  17513. if g_use_extra:
  17514. names = extra.JM_get_annot_xref_list( page_obj)
  17515. return names
  17516. names = []
  17517. annots = mupdf.pdf_dict_get( page_obj, PDF_NAME('Annots'))
  17518. n = mupdf.pdf_array_len( annots)
  17519. for i in range( n):
  17520. annot_obj = mupdf.pdf_array_get( annots, i)
  17521. xref = mupdf.pdf_to_num( annot_obj)
  17522. subtype = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Subtype'))
  17523. if not subtype.m_internal:
  17524. continue # subtype is required
  17525. type_ = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subtype))
  17526. if type_ == mupdf.PDF_ANNOT_UNKNOWN:
  17527. continue # only accept valid annot types
  17528. id_ = mupdf.pdf_dict_gets( annot_obj, "NM")
  17529. names.append( (xref, type_, mupdf.pdf_to_text_string( id_)))
  17530. return names
  17531. def JM_get_annot_xref_list2(page):
  17532. page = page._pdf_page(required=False)
  17533. if not page.m_internal:
  17534. return list()
  17535. return JM_get_annot_xref_list( page.obj())
  17536. def JM_get_border_style(style):
  17537. '''
  17538. return pdf_obj "border style" from Python str
  17539. '''
  17540. val = mupdf.PDF_ENUM_NAME_S
  17541. if style is None:
  17542. return val
  17543. s = style
  17544. if s.startswith("b") or s.startswith("B"): val = mupdf.PDF_ENUM_NAME_B
  17545. elif s.startswith("d") or s.startswith("D"): val = mupdf.PDF_ENUM_NAME_D
  17546. elif s.startswith("i") or s.startswith("I"): val = mupdf.PDF_ENUM_NAME_I
  17547. elif s.startswith("u") or s.startswith("U"): val = mupdf.PDF_ENUM_NAME_U
  17548. elif s.startswith("s") or s.startswith("S"): val = mupdf.PDF_ENUM_NAME_S
  17549. return val
  17550. def JM_get_font(
  17551. fontname,
  17552. fontfile,
  17553. fontbuffer,
  17554. script,
  17555. lang,
  17556. ordering,
  17557. is_bold,
  17558. is_italic,
  17559. is_serif,
  17560. embed,
  17561. ):
  17562. '''
  17563. return a fz_font from a number of parameters
  17564. '''
  17565. def fertig(font):
  17566. if not font.m_internal:
  17567. raise RuntimeError(MSG_FONT_FAILED)
  17568. # if font allows this, set embedding
  17569. if not font.m_internal.flags.never_embed:
  17570. mupdf.fz_set_font_embedding(font, embed)
  17571. return font
  17572. index = 0
  17573. font = None
  17574. if fontfile:
  17575. #goto have_file;
  17576. font = mupdf.fz_new_font_from_file( None, fontfile, index, 0)
  17577. return fertig(font)
  17578. if fontbuffer:
  17579. #goto have_buffer;
  17580. res = JM_BufferFromBytes(fontbuffer)
  17581. font = mupdf.fz_new_font_from_buffer( None, res, index, 0)
  17582. return fertig(font)
  17583. if ordering > -1:
  17584. # goto have_cjk;
  17585. font = mupdf.fz_new_cjk_font(ordering)
  17586. return fertig(font)
  17587. if fontname:
  17588. # goto have_base14;
  17589. # Base-14 or a MuPDF builtin font
  17590. font = mupdf.fz_new_base14_font(fontname)
  17591. if font.m_internal:
  17592. return fertig(font)
  17593. font = mupdf.fz_new_builtin_font(fontname, is_bold, is_italic)
  17594. return fertig(font)
  17595. # Check for NOTO font
  17596. #have_noto:;
  17597. data, size, index = mupdf.fz_lookup_noto_font( script, lang)
  17598. font = None
  17599. if data:
  17600. font = mupdf.fz_new_font_from_memory( None, data, size, index, 0)
  17601. if font.m_internal:
  17602. return fertig(font)
  17603. font = mupdf.fz_load_fallback_font( script, lang, is_serif, is_bold, is_italic)
  17604. return fertig(font)
  17605. def JM_get_fontbuffer(doc, xref):
  17606. '''
  17607. Return the contents of a font file, identified by xref
  17608. '''
  17609. if xref < 1:
  17610. return
  17611. o = mupdf.pdf_load_object(doc, xref)
  17612. desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
  17613. if desft.m_internal:
  17614. obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
  17615. obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
  17616. else:
  17617. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
  17618. if not obj.m_internal:
  17619. message(f"invalid font - FontDescriptor missing")
  17620. return
  17621. o = obj
  17622. stream = None
  17623. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
  17624. if obj.m_internal:
  17625. stream = obj # ext = "pfa"
  17626. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
  17627. if obj.m_internal:
  17628. stream = obj # ext = "ttf"
  17629. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
  17630. if obj.m_internal:
  17631. stream = obj
  17632. obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
  17633. if obj.m_internal and not mupdf.pdf_is_name(obj):
  17634. message("invalid font descriptor subtype")
  17635. return
  17636. if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
  17637. pass # Prev code did: ext = "cff", but this has no effect.
  17638. elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
  17639. pass # Prev code did: ext = "cid", but this has no effect.
  17640. elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
  17641. pass # Prev code did: ext = "otf", but this has no effect. */
  17642. else:
  17643. message('warning: unhandled font type {pdf_to_name(ctx, obj)!r}')
  17644. if not stream:
  17645. message('warning: unhandled font type')
  17646. return
  17647. return mupdf.pdf_load_stream(stream)
  17648. def JM_get_resource_properties(ref):
  17649. '''
  17650. Return the items of Resources/Properties (used for Marked Content)
  17651. Argument may be e.g. a page object or a Form XObject
  17652. '''
  17653. properties = mupdf.pdf_dict_getl(ref, PDF_NAME('Resources'), PDF_NAME('Properties'))
  17654. if not properties.m_internal:
  17655. return ()
  17656. else:
  17657. n = mupdf.pdf_dict_len(properties)
  17658. if n < 1:
  17659. return ()
  17660. rc = []
  17661. for i in range(n):
  17662. key = mupdf.pdf_dict_get_key(properties, i)
  17663. val = mupdf.pdf_dict_get_val(properties, i)
  17664. c = mupdf.pdf_to_name(key)
  17665. xref = mupdf.pdf_to_num(val)
  17666. rc.append((c, xref))
  17667. return rc
  17668. def JM_get_widget_by_xref( page, xref):
  17669. '''
  17670. retrieve widget by its xref
  17671. '''
  17672. found = False
  17673. annot = mupdf.pdf_first_widget( page)
  17674. while annot.m_internal:
  17675. annot_obj = mupdf.pdf_annot_obj( annot)
  17676. if xref == mupdf.pdf_to_num( annot_obj):
  17677. found = True
  17678. break
  17679. annot = mupdf.pdf_next_widget( annot)
  17680. if not found:
  17681. raise Exception( f"xref {xref} is not a widget of this page")
  17682. return Annot( annot)
  17683. def JM_get_widget_properties(annot, Widget):
  17684. '''
  17685. Populate a Python Widget object with the values from a PDF form field.
  17686. Called by "Page.first_widget" and "Widget.next".
  17687. '''
  17688. #log( '{type(annot)=}')
  17689. annot_obj = mupdf.pdf_annot_obj(annot.this)
  17690. #log( 'Have called mupdf.pdf_annot_obj()')
  17691. page = _pdf_annot_page(annot.this)
  17692. pdf = page.doc()
  17693. tw = annot
  17694. def SETATTR(key, value):
  17695. setattr(Widget, key, value)
  17696. def SETATTR_DROP(mod, key, value):
  17697. # Original C code for this function deletes if PyObject* is NULL. We
  17698. # don't have a representation for that in Python - e.g. None is not
  17699. # represented by NULL.
  17700. setattr(mod, key, value)
  17701. #log( '=== + mupdf.pdf_widget_type(tw)')
  17702. field_type = mupdf.pdf_widget_type(tw.this)
  17703. #log( '=== - mupdf.pdf_widget_type(tw)')
  17704. Widget.field_type = field_type
  17705. if field_type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  17706. if mupdf.pdf_signature_is_signed(pdf, annot_obj):
  17707. SETATTR("is_signed", True)
  17708. else:
  17709. SETATTR("is_signed",False)
  17710. else:
  17711. SETATTR("is_signed", None)
  17712. SETATTR_DROP(Widget, "border_style", JM_UnicodeFromStr(mupdf.pdf_field_border_style(annot_obj)))
  17713. SETATTR_DROP(Widget, "field_type_string", JM_UnicodeFromStr(JM_field_type_text(field_type)))
  17714. field_name = mupdf.pdf_load_field_name(annot_obj)
  17715. SETATTR_DROP(Widget, "field_name", field_name)
  17716. def pdf_dict_get_inheritable_nonempty_label(node, key):
  17717. '''
  17718. This is a modified version of MuPDF's pdf_dict_get_inheritable(), with
  17719. some changes:
  17720. * Returns string from pdf_to_text_string() or None if not found.
  17721. * Recurses to parent if current node exists but with empty string
  17722. value.
  17723. '''
  17724. slow = node
  17725. halfbeat = 11 # Don't start moving slow pointer for a while.
  17726. while 1:
  17727. if not node.m_internal:
  17728. return
  17729. val = mupdf.pdf_dict_get(node, key)
  17730. if val.m_internal:
  17731. label = mupdf.pdf_to_text_string(val)
  17732. if label:
  17733. return label
  17734. node = mupdf.pdf_dict_get(node, PDF_NAME('Parent'))
  17735. if node.m_internal == slow.m_internal:
  17736. raise Exception("cycle in resources")
  17737. halfbeat -= 1
  17738. if halfbeat == 0:
  17739. slow = mupdf.pdf_dict_get(slow, PDF_NAME('Parent'))
  17740. halfbeat = 2
  17741. # In order to address #3950, we use our modified pdf_dict_get_inheritable()
  17742. # to ignore empty-string child values.
  17743. label = pdf_dict_get_inheritable_nonempty_label(annot_obj, PDF_NAME('TU'))
  17744. if label is not None:
  17745. SETATTR_DROP(Widget, "field_label", label)
  17746. fvalue = None
  17747. if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  17748. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Parent')) # owning RB group
  17749. if obj.m_internal:
  17750. SETATTR_DROP(Widget, "rb_parent", mupdf.pdf_to_num( obj))
  17751. obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('AS'))
  17752. if obj.m_internal:
  17753. fvalue = mupdf.pdf_to_name(obj)
  17754. if not fvalue:
  17755. fvalue = mupdf.pdf_field_value(annot_obj)
  17756. SETATTR_DROP(Widget, "field_value", JM_UnicodeFromStr(fvalue))
  17757. SETATTR_DROP(Widget, "field_display", mupdf.pdf_field_display(annot_obj))
  17758. border_width = mupdf.pdf_to_real(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('W')))
  17759. if border_width == 0:
  17760. border_width = 1
  17761. SETATTR_DROP(Widget, "border_width", border_width)
  17762. obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('D'))
  17763. if mupdf.pdf_is_array(obj):
  17764. n = mupdf.pdf_array_len(obj)
  17765. d = [0] * n
  17766. for i in range(n):
  17767. d[i] = mupdf.pdf_to_int(mupdf.pdf_array_get(obj, i))
  17768. SETATTR_DROP(Widget, "border_dashes", d)
  17769. SETATTR_DROP(Widget, "text_maxlen", mupdf.pdf_text_widget_max_len(tw.this))
  17770. SETATTR_DROP(Widget, "text_format", mupdf.pdf_text_widget_format(tw.this))
  17771. obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BG'))
  17772. if mupdf.pdf_is_array(obj):
  17773. n = mupdf.pdf_array_len(obj)
  17774. col = [0] * n
  17775. for i in range(n):
  17776. col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
  17777. SETATTR_DROP(Widget, "fill_color", col)
  17778. obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BC'))
  17779. if mupdf.pdf_is_array(obj):
  17780. n = mupdf.pdf_array_len(obj)
  17781. col = [0] * n
  17782. for i in range(n):
  17783. col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
  17784. SETATTR_DROP(Widget, "border_color", col)
  17785. SETATTR_DROP(Widget, "choice_values", JM_choice_options(annot))
  17786. da = mupdf.pdf_to_text_string(mupdf.pdf_dict_get_inheritable(annot_obj, PDF_NAME('DA')))
  17787. SETATTR_DROP(Widget, "_text_da", JM_UnicodeFromStr(da))
  17788. obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('CA'))
  17789. if obj.m_internal:
  17790. SETATTR_DROP(Widget, "button_caption", JM_UnicodeFromStr(mupdf.pdf_to_text_string(obj)))
  17791. SETATTR_DROP(Widget, "field_flags", mupdf.pdf_field_flags(annot_obj))
  17792. # call Py method to reconstruct text color, font name, size
  17793. Widget._parse_da()
  17794. # extract JavaScript action texts
  17795. s = mupdf.pdf_dict_get(annot_obj, PDF_NAME('A'))
  17796. ss = JM_get_script(s)
  17797. SETATTR_DROP(Widget, "script", ss)
  17798. SETATTR_DROP(Widget, "script_stroke",
  17799. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('K')))
  17800. )
  17801. SETATTR_DROP(Widget, "script_format",
  17802. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('F')))
  17803. )
  17804. SETATTR_DROP(Widget, "script_change",
  17805. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('V')))
  17806. )
  17807. SETATTR_DROP(Widget, "script_calc",
  17808. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('C')))
  17809. )
  17810. SETATTR_DROP(Widget, "script_blur",
  17811. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl')))
  17812. )
  17813. SETATTR_DROP(Widget, "script_focus",
  17814. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo')))
  17815. )
  17816. def JM_get_fontextension(doc, xref):
  17817. '''
  17818. Return the file extension of a font file, identified by xref
  17819. '''
  17820. if xref < 1:
  17821. return "n/a"
  17822. o = mupdf.pdf_load_object(doc, xref)
  17823. desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
  17824. if desft.m_internal:
  17825. obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
  17826. obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
  17827. else:
  17828. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
  17829. if not obj.m_internal:
  17830. return "n/a" # this is a base-14 font
  17831. o = obj # we have the FontDescriptor
  17832. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
  17833. if obj.m_internal:
  17834. return "pfa"
  17835. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
  17836. if obj.m_internal:
  17837. return "ttf"
  17838. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
  17839. if obj.m_internal:
  17840. obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
  17841. if obj.m_internal and not mupdf.pdf_is_name(obj):
  17842. message("invalid font descriptor subtype")
  17843. return "n/a"
  17844. if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
  17845. return "cff"
  17846. elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
  17847. return "cid"
  17848. elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
  17849. return "otf"
  17850. else:
  17851. message("unhandled font type '%s'", mupdf.pdf_to_name(obj))
  17852. return "n/a"
  17853. def JM_get_ocg_arrays_imp(arr):
  17854. '''
  17855. Get OCG arrays from OC configuration
  17856. Returns dict {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list}
  17857. '''
  17858. list_ = list()
  17859. if mupdf.pdf_is_array( arr):
  17860. n = mupdf.pdf_array_len( arr)
  17861. for i in range(n):
  17862. obj = mupdf.pdf_array_get( arr, i)
  17863. item = mupdf.pdf_to_num( obj)
  17864. if item not in list_:
  17865. list_.append(item)
  17866. return list_
  17867. def JM_get_ocg_arrays(conf):
  17868. rc = dict()
  17869. arr = mupdf.pdf_dict_get( conf, PDF_NAME('ON'))
  17870. list_ = JM_get_ocg_arrays_imp( arr)
  17871. if list_:
  17872. rc["on"] = list_
  17873. arr = mupdf.pdf_dict_get( conf, PDF_NAME('OFF'))
  17874. list_ = JM_get_ocg_arrays_imp( arr)
  17875. if list_:
  17876. rc["off"] = list_
  17877. arr = mupdf.pdf_dict_get( conf, PDF_NAME('Locked'))
  17878. list_ = JM_get_ocg_arrays_imp( arr)
  17879. if list_:
  17880. rc['locked'] = list_
  17881. list_ = list()
  17882. arr = mupdf.pdf_dict_get( conf, PDF_NAME('RBGroups'))
  17883. if mupdf.pdf_is_array( arr):
  17884. n = mupdf.pdf_array_len( arr)
  17885. for i in range(n):
  17886. obj = mupdf.pdf_array_get( arr, i)
  17887. list1 = JM_get_ocg_arrays_imp( obj)
  17888. list_.append(list1)
  17889. if list_:
  17890. rc["rbgroups"] = list_
  17891. obj = mupdf.pdf_dict_get( conf, PDF_NAME('BaseState'))
  17892. if obj.m_internal:
  17893. state = mupdf.pdf_to_name( obj)
  17894. rc["basestate"] = state
  17895. return rc
  17896. def JM_get_page_labels(liste, nums):
  17897. n = mupdf.pdf_array_len(nums)
  17898. for i in range(0, n, 2):
  17899. key = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i))
  17900. pno = mupdf.pdf_to_int(key)
  17901. val = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i + 1))
  17902. res = JM_object_to_buffer(val, 1, 0)
  17903. c = mupdf.fz_buffer_extract(res)
  17904. assert isinstance(c, bytes)
  17905. c = c.decode('utf-8')
  17906. liste.append( (pno, c))
  17907. def JM_get_script(key):
  17908. '''
  17909. JavaScript extractor
  17910. Returns either the script source or None. Parameter is a PDF action
  17911. dictionary, which must have keys /S and /JS. The value of /S must be
  17912. '/JavaScript'. The value of /JS is returned.
  17913. '''
  17914. if not key.m_internal:
  17915. return
  17916. j = mupdf.pdf_dict_get(key, PDF_NAME('S'))
  17917. jj = mupdf.pdf_to_name(j)
  17918. if jj == "JavaScript":
  17919. js = mupdf.pdf_dict_get(key, PDF_NAME('JS'))
  17920. if not js.m_internal:
  17921. return
  17922. else:
  17923. return
  17924. if mupdf.pdf_is_string(js):
  17925. script = JM_UnicodeFromStr(mupdf.pdf_to_text_string(js))
  17926. elif mupdf.pdf_is_stream(js):
  17927. res = mupdf.pdf_load_stream(js)
  17928. script = JM_EscapeStrFromBuffer(res)
  17929. else:
  17930. return
  17931. if script: # do not return an empty script
  17932. return script
  17933. return
  17934. def JM_have_operation(pdf):
  17935. '''
  17936. Ensure valid journalling state
  17937. '''
  17938. if pdf.m_internal.journal and not mupdf.pdf_undoredo_step(pdf, 0):
  17939. return 0
  17940. return 1
  17941. def JM_image_extension(type_):
  17942. '''
  17943. return extension for MuPDF image type
  17944. '''
  17945. if type_ == mupdf.FZ_IMAGE_FAX: return "fax"
  17946. if type_ == mupdf.FZ_IMAGE_RAW: return "raw"
  17947. if type_ == mupdf.FZ_IMAGE_FLATE: return "flate"
  17948. if type_ == mupdf.FZ_IMAGE_LZW: return "lzw"
  17949. if type_ == mupdf.FZ_IMAGE_RLD: return "rld"
  17950. if type_ == mupdf.FZ_IMAGE_BMP: return "bmp"
  17951. if type_ == mupdf.FZ_IMAGE_GIF: return "gif"
  17952. if type_ == mupdf.FZ_IMAGE_JBIG2: return "jb2"
  17953. if type_ == mupdf.FZ_IMAGE_JPEG: return "jpeg"
  17954. if type_ == mupdf.FZ_IMAGE_JPX: return "jpx"
  17955. if type_ == mupdf.FZ_IMAGE_JXR: return "jxr"
  17956. if type_ == mupdf.FZ_IMAGE_PNG: return "png"
  17957. if type_ == mupdf.FZ_IMAGE_PNM: return "pnm"
  17958. if type_ == mupdf.FZ_IMAGE_TIFF: return "tiff"
  17959. #if type_ == mupdf.FZ_IMAGE_PSD: return "psd"
  17960. return "n/a"
  17961. # fixme: need to avoid using a global for this.
  17962. g_img_info = None
  17963. def JM_image_filter(opaque, ctm, name, image):
  17964. assert isinstance(ctm, mupdf.FzMatrix)
  17965. r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  17966. q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm)
  17967. q = mupdf.fz_transform_quad( q, g_img_info_matrix)
  17968. temp = name, JM_py_from_quad(q)
  17969. g_img_info.append(temp)
  17970. def JM_image_profile( imagedata, keep_image):
  17971. '''
  17972. Return basic properties of an image provided as bytes or bytearray
  17973. The function creates an fz_image and optionally returns it.
  17974. '''
  17975. if not imagedata:
  17976. return None # nothing given
  17977. len_ = len( imagedata)
  17978. if len_ < 8:
  17979. message( "bad image data")
  17980. return None
  17981. c = imagedata
  17982. #log( 'calling mfz_recognize_image_format with {c!r=}')
  17983. type_ = mupdf.fz_recognize_image_format( c)
  17984. if type_ == mupdf.FZ_IMAGE_UNKNOWN:
  17985. return None
  17986. if keep_image:
  17987. res = mupdf.fz_new_buffer_from_copied_data( c, len_)
  17988. else:
  17989. res = mupdf.fz_new_buffer_from_shared_data( c, len_)
  17990. image = mupdf.fz_new_image_from_buffer( res)
  17991. ctm = mupdf.fz_image_orientation_matrix( image)
  17992. xres, yres = mupdf.fz_image_resolution(image)
  17993. orientation = mupdf.fz_image_orientation( image)
  17994. cs_name = mupdf.fz_colorspace_name( image.colorspace())
  17995. result = dict()
  17996. result[ dictkey_width] = image.w()
  17997. result[ dictkey_height] = image.h()
  17998. result[ "orientation"] = orientation
  17999. result[ dictkey_matrix] = JM_py_from_matrix(ctm)
  18000. result[ dictkey_xres] = xres
  18001. result[ dictkey_yres] = yres
  18002. result[ dictkey_colorspace] = image.n()
  18003. result[ dictkey_bpc] = image.bpc()
  18004. result[ dictkey_ext] = JM_image_extension(type_)
  18005. result[ dictkey_cs_name] = cs_name
  18006. if keep_image:
  18007. result[ dictkey_image] = image
  18008. return result
  18009. def JM_image_reporter(page):
  18010. doc = page.doc()
  18011. global g_img_info_matrix
  18012. g_img_info_matrix = mupdf.FzMatrix()
  18013. mediabox = mupdf.FzRect()
  18014. mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix)
  18015. class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
  18016. def __init__(self):
  18017. super().__init__()
  18018. self.use_virtual_image_filter()
  18019. def image_filter(self, ctx, ctm, name, image, scissor):
  18020. JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
  18021. sanitize_filter_options = SanitizeFilterOptions()
  18022. filter_options = _make_PdfFilterOptions(
  18023. instance_forms=1,
  18024. ascii=1,
  18025. no_update=1,
  18026. sanitize=1,
  18027. sopts=sanitize_filter_options,
  18028. )
  18029. global g_img_info
  18030. g_img_info = []
  18031. mupdf.pdf_filter_page_contents( doc, page, filter_options)
  18032. rc = tuple(g_img_info)
  18033. g_img_info = []
  18034. return rc
  18035. def JM_fitz_config():
  18036. have_TOFU = not hasattr(mupdf, 'TOFU')
  18037. have_TOFU_BASE14 = not hasattr(mupdf, 'TOFU_BASE14')
  18038. have_TOFU_CJK = not hasattr(mupdf, 'TOFU_CJK')
  18039. have_TOFU_CJK_EXT = not hasattr(mupdf, 'TOFU_CJK_EXT')
  18040. have_TOFU_CJK_LANG = not hasattr(mupdf, 'TOFU_CJK_LANG')
  18041. have_TOFU_EMOJI = not hasattr(mupdf, 'TOFU_EMOJI')
  18042. have_TOFU_HISTORIC = not hasattr(mupdf, 'TOFU_HISTORIC')
  18043. have_TOFU_SIL = not hasattr(mupdf, 'TOFU_SIL')
  18044. have_TOFU_SYMBOL = not hasattr(mupdf, 'TOFU_SYMBOL')
  18045. ret = dict()
  18046. ret["base14"] = have_TOFU_BASE14
  18047. ret["cbz"] = bool(mupdf.FZ_ENABLE_CBZ)
  18048. ret["epub"] = bool(mupdf.FZ_ENABLE_EPUB)
  18049. ret["html"] = bool(mupdf.FZ_ENABLE_HTML)
  18050. ret["icc"] = bool(mupdf.FZ_ENABLE_ICC)
  18051. ret["img"] = bool(mupdf.FZ_ENABLE_IMG)
  18052. ret["jpx"] = bool(mupdf.FZ_ENABLE_JPX)
  18053. ret["js"] = bool(mupdf.FZ_ENABLE_JS)
  18054. ret["pdf"] = bool(mupdf.FZ_ENABLE_PDF)
  18055. ret["plotter-cmyk"] = bool(mupdf.FZ_PLOTTERS_CMYK)
  18056. ret["plotter-g"] = bool(mupdf.FZ_PLOTTERS_G)
  18057. ret["plotter-n"] = bool(mupdf.FZ_PLOTTERS_N)
  18058. ret["plotter-rgb"] = bool(mupdf.FZ_PLOTTERS_RGB)
  18059. ret["py-memory"] = bool(JM_MEMORY)
  18060. ret["svg"] = bool(mupdf.FZ_ENABLE_SVG)
  18061. ret["tofu"] = have_TOFU
  18062. ret["tofu-cjk"] = have_TOFU_CJK
  18063. ret["tofu-cjk-ext"] = have_TOFU_CJK_EXT
  18064. ret["tofu-cjk-lang"] = have_TOFU_CJK_LANG
  18065. ret["tofu-emoji"] = have_TOFU_EMOJI
  18066. ret["tofu-historic"] = have_TOFU_HISTORIC
  18067. ret["tofu-sil"] = have_TOFU_SIL
  18068. ret["tofu-symbol"] = have_TOFU_SYMBOL
  18069. ret["xps"] = bool(mupdf.FZ_ENABLE_XPS)
  18070. return ret
  18071. def JM_insert_contents(pdf, pageref, newcont, overlay):
  18072. '''
  18073. Insert a buffer as a new separate /Contents object of a page.
  18074. 1. Create a new stream object from buffer 'newcont'
  18075. 2. If /Contents already is an array, then just prepend or append this object
  18076. 3. Else, create new array and put old content obj and this object into it.
  18077. If the page had no /Contents before, just create a 1-item array.
  18078. '''
  18079. contents = mupdf.pdf_dict_get(pageref, PDF_NAME('Contents'))
  18080. newconts = mupdf.pdf_add_stream(pdf, newcont, mupdf.PdfObj(), 0)
  18081. xref = mupdf.pdf_to_num(newconts)
  18082. if mupdf.pdf_is_array(contents):
  18083. if overlay: # append new object
  18084. mupdf.pdf_array_push(contents, newconts)
  18085. else: # prepend new object
  18086. mupdf.pdf_array_insert(contents, newconts, 0)
  18087. else:
  18088. carr = mupdf.pdf_new_array(pdf, 5)
  18089. if overlay:
  18090. if contents.m_internal:
  18091. mupdf.pdf_array_push(carr, contents)
  18092. mupdf.pdf_array_push(carr, newconts)
  18093. else:
  18094. mupdf.pdf_array_push(carr, newconts)
  18095. if contents.m_internal:
  18096. mupdf.pdf_array_push(carr, contents)
  18097. mupdf.pdf_dict_put(pageref, PDF_NAME('Contents'), carr)
  18098. return xref
  18099. def JM_insert_font(pdf, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
  18100. '''
  18101. Insert a font in a PDF
  18102. '''
  18103. font = None
  18104. res = None
  18105. data = None
  18106. ixref = 0
  18107. index = 0
  18108. simple = 0
  18109. value=None
  18110. name=None
  18111. subt=None
  18112. exto = None
  18113. ENSURE_OPERATION(pdf)
  18114. # check for CJK font
  18115. if ordering > -1:
  18116. data, size, index = mupdf.fz_lookup_cjk_font(ordering)
  18117. if data:
  18118. font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
  18119. font_obj = mupdf.pdf_add_cjk_font(pdf, font, ordering, wmode, serif)
  18120. exto = "n/a"
  18121. simple = 0
  18122. #goto weiter;
  18123. else:
  18124. # check for PDF Base-14 font
  18125. if bfname:
  18126. data, size = mupdf.fz_lookup_base14_font(bfname)
  18127. if data:
  18128. font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
  18129. font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
  18130. exto = "n/a"
  18131. simple = 1
  18132. #goto weiter;
  18133. else:
  18134. if fontfile:
  18135. font = mupdf.fz_new_font_from_file(None, fontfile, idx, 0)
  18136. else:
  18137. res = JM_BufferFromBytes(fontbuffer)
  18138. if not res.m_internal:
  18139. RAISEPY(MSG_FILE_OR_BUFFER, PyExc_ValueError)
  18140. font = mupdf.fz_new_font_from_buffer(None, res, idx, 0)
  18141. if not set_simple:
  18142. font_obj = mupdf.pdf_add_cid_font(pdf, font)
  18143. simple = 0
  18144. else:
  18145. font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
  18146. simple = 2
  18147. #weiter: ;
  18148. ixref = mupdf.pdf_to_num(font_obj)
  18149. name = JM_EscapeStrFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get(font_obj, PDF_NAME('BaseFont'))))
  18150. subt = JM_UnicodeFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get( font_obj, PDF_NAME('Subtype'))))
  18151. if not exto:
  18152. exto = JM_UnicodeFromStr(JM_get_fontextension(pdf, ixref))
  18153. asc = mupdf.fz_font_ascender(font)
  18154. dsc = mupdf.fz_font_descender(font)
  18155. value = [
  18156. ixref,
  18157. {
  18158. "name": name, # base font name
  18159. "type": subt, # subtype
  18160. "ext": exto, # file extension
  18161. "simple": bool(simple), # simple font?
  18162. "ordering": ordering, # CJK font?
  18163. "ascender": asc,
  18164. "descender": dsc,
  18165. },
  18166. ]
  18167. return value
  18168. def JM_irect_from_py(r):
  18169. '''
  18170. PySequence to mupdf.FzIrect. Default: infinite irect
  18171. '''
  18172. if isinstance(r, mupdf.FzIrect):
  18173. return r
  18174. if isinstance(r, IRect):
  18175. r = mupdf.FzIrect( r.x0, r.y0, r.x1, r.y1)
  18176. return r
  18177. if isinstance(r, Rect):
  18178. ret = mupdf.FzRect(r.x0, r.y0, r.x1, r.y1)
  18179. ret = mupdf.FzIrect(ret) # Uses fz_irect_from_rect().
  18180. return ret
  18181. if isinstance(r, mupdf.FzRect):
  18182. ret = mupdf.FzIrect(r) # Uses fz_irect_from_rect().
  18183. return ret
  18184. if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
  18185. return mupdf.FzIrect(mupdf.fz_infinite_irect)
  18186. f = [0, 0, 0, 0]
  18187. for i in range(4):
  18188. f[i] = r[i]
  18189. if f[i] is None:
  18190. return mupdf.FzIrect(mupdf.fz_infinite_irect)
  18191. if f[i] < FZ_MIN_INF_RECT:
  18192. f[i] = FZ_MIN_INF_RECT
  18193. if f[i] > FZ_MAX_INF_RECT:
  18194. f[i] = FZ_MAX_INF_RECT
  18195. return mupdf.fz_make_irect(f[0], f[1], f[2], f[3])
  18196. def JM_listbox_value( annot):
  18197. '''
  18198. ListBox retrieve value
  18199. '''
  18200. # may be single value or array
  18201. annot_obj = mupdf.pdf_annot_obj( annot)
  18202. optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('V'))
  18203. if mupdf.pdf_is_string( optarr): # a single string
  18204. return mupdf.pdf_to_text_string( optarr)
  18205. # value is an array (may have len 0)
  18206. n = mupdf.pdf_array_len( optarr)
  18207. liste = []
  18208. # extract a list of strings
  18209. # each entry may again be an array: take second entry then
  18210. for i in range( n):
  18211. elem = mupdf.pdf_array_get( optarr, i)
  18212. if mupdf.pdf_is_array( elem):
  18213. elem = mupdf.pdf_array_get( elem, 1)
  18214. liste.append( JM_UnicodeFromStr( mupdf.pdf_to_text_string( elem)))
  18215. return liste
  18216. def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
  18217. # PyMuPDF uses a fz_buffer to build up the string, but it's non-trivial to
  18218. # convert the fz_buffer's `unsigned char*` into a `const char*` suitable
  18219. # for passing to pdf_dict_put_text_string(). So instead we build up the
  18220. # string directly in Python.
  18221. buf = ''
  18222. if ncol < 1:
  18223. buf += f'0 g '
  18224. elif ncol == 1:
  18225. buf += f'{col[0]:g} g '
  18226. elif ncol == 2:
  18227. assert 0
  18228. elif ncol == 3:
  18229. buf += f'{col[0]:g} {col[1]:g} {col[2]:g} rg '
  18230. else:
  18231. buf += f'{col[0]:g} {col[1]:g} {col[2]:g} {col[3]:g} k '
  18232. buf += f'/{JM_expand_fname(fontname)} {fontsize} Tf'
  18233. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf)
  18234. def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
  18235. if 1 or g_use_extra:
  18236. return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
  18237. char_list = None
  18238. span_list = []
  18239. mupdf.fz_clear_buffer(buff)
  18240. span_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  18241. line_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  18242. class char_style:
  18243. def __init__(self, rhs=None):
  18244. if rhs:
  18245. self.size = rhs.size
  18246. self.flags = rhs.flags
  18247. if mupdf_version_tuple >= (1, 25, 2):
  18248. self.char_flags = rhs.char_flags
  18249. self.font = rhs.font
  18250. self.argb = rhs.argb
  18251. self.asc = rhs.asc
  18252. self.desc = rhs.desc
  18253. self.bidi = rhs.bidi
  18254. else:
  18255. self.size = -1
  18256. self.flags = -1
  18257. if mupdf_version_tuple >= (1, 25, 2):
  18258. self.char_flags = -1
  18259. self.font = ''
  18260. self.argb = -1
  18261. self.asc = 0
  18262. self.desc = 0
  18263. self.bidi = 0
  18264. def __str__(self):
  18265. ret = f'{self.size} {self.flags}'
  18266. if mupdf_version_tuple >= (1, 25, 2):
  18267. ret += f' {self.char_flags}'
  18268. ret += f' {self.font} {self.color} {self.asc} {self.desc}'
  18269. return ret
  18270. old_style = char_style()
  18271. style = char_style()
  18272. span = None
  18273. span_origin = None
  18274. for ch in line:
  18275. # start-trace
  18276. r = JM_char_bbox(line, ch)
  18277. if (not JM_rects_overlap(tp_rect, r)
  18278. and not mupdf.fz_is_infinite_rect(tp_rect)
  18279. ):
  18280. continue
  18281. # Info from:
  18282. # detect_super_script()
  18283. # fz_font_is_italic()
  18284. # fz_font_is_serif()
  18285. # fz_font_is_monospaced()
  18286. # fz_font_is_bold()
  18287. flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
  18288. origin = mupdf.FzPoint(ch.m_internal.origin)
  18289. style.size = ch.m_internal.size
  18290. style.flags = flags
  18291. if mupdf_version_tuple >= (1, 25, 2):
  18292. # FZ_STEXT_SYNTHETIC is per-char, not per-span.
  18293. style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC
  18294. style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
  18295. style.argb = ch.m_internal.argb
  18296. style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
  18297. style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
  18298. style.bidi = ch.m_internal.bidi
  18299. if (style.size != old_style.size
  18300. or style.flags != old_style.flags
  18301. or (mupdf_version_tuple >= (1, 25, 2)
  18302. and (style.char_flags != old_style.char_flags)
  18303. )
  18304. or style.argb != old_style.argb
  18305. or style.font != old_style.font
  18306. or style.bidi != old_style.bidi
  18307. ):
  18308. if old_style.size >= 0:
  18309. # not first one, output previous
  18310. if raw:
  18311. # put character list in the span
  18312. span[dictkey_chars] = char_list
  18313. char_list = None
  18314. else:
  18315. # put text string in the span
  18316. span[dictkey_text] = JM_EscapeStrFromBuffer( buff)
  18317. mupdf.fz_clear_buffer(buff)
  18318. span[dictkey_origin] = JM_py_from_point(span_origin)
  18319. span[dictkey_bbox] = JM_py_from_rect(span_rect)
  18320. line_rect = mupdf.fz_union_rect(line_rect, span_rect)
  18321. span_list.append( span)
  18322. span = None
  18323. span = dict()
  18324. asc = style.asc
  18325. desc = style.desc
  18326. if style.asc < 1e-3:
  18327. asc = 0.9
  18328. desc = -0.1
  18329. span[dictkey_size] = style.size
  18330. span[dictkey_flags] = style.flags
  18331. span[dictkey_bidi] = style.bidi
  18332. if mupdf_version_tuple >= (1, 25, 2):
  18333. span[dictkey_char_flags] = style.char_flags
  18334. span[dictkey_font] = JM_EscapeStrFromStr(style.font)
  18335. span[dictkey_color] = style.argb & 0xffffff
  18336. if mupdf_version_tuple >= (1, 25, 0):
  18337. span['alpha'] = style.argb >> 24
  18338. span["ascender"] = asc
  18339. span["descender"] = desc
  18340. # Need to be careful here - doing 'old_style=style' does a shallow
  18341. # copy, but we need to keep old_style as a distinct instance.
  18342. old_style = char_style(style)
  18343. span_rect = r
  18344. span_origin = origin
  18345. span_rect = mupdf.fz_union_rect(span_rect, r)
  18346. if raw: # make and append a char dict
  18347. char_dict = dict()
  18348. char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
  18349. char_dict[dictkey_bbox] = JM_py_from_rect(r)
  18350. char_dict[dictkey_c] = chr(ch.m_internal.c)
  18351. char_dict['synthetic'] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC)
  18352. if char_list is None:
  18353. char_list = []
  18354. char_list.append(char_dict)
  18355. else: # add character byte to buffer
  18356. JM_append_rune(buff, ch.m_internal.c)
  18357. # all characters processed, now flush remaining span
  18358. if span:
  18359. if raw:
  18360. span[dictkey_chars] = char_list
  18361. char_list = None
  18362. else:
  18363. span[dictkey_text] = JM_EscapeStrFromBuffer(buff)
  18364. mupdf.fz_clear_buffer(buff)
  18365. span[dictkey_origin] = JM_py_from_point(span_origin)
  18366. span[dictkey_bbox] = JM_py_from_rect(span_rect)
  18367. if not mupdf.fz_is_empty_rect(span_rect):
  18368. span_list.append(span)
  18369. line_rect = mupdf.fz_union_rect(line_rect, span_rect)
  18370. span = None
  18371. if not mupdf.fz_is_empty_rect(line_rect):
  18372. line_dict[dictkey_spans] = span_list
  18373. else:
  18374. line_dict[dictkey_spans] = span_list
  18375. return line_rect
  18376. def _make_image_dict(img, img_dict):
  18377. """Populate a dictionary with information extracted from a given image.
  18378. Used by 'Document.extract_image' and by 'JM_make_image_block'.
  18379. Both of these functions will add some more specific information.
  18380. """
  18381. img_type = img.fz_compressed_image_type()
  18382. ext = JM_image_extension(img_type)
  18383. # compressed image buffer if present, else None
  18384. ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
  18385. if (0
  18386. or not ll_cbuf
  18387. or img_type in (mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN)
  18388. or img_type < mupdf.FZ_IMAGE_BMP
  18389. ):
  18390. # not an image with a compressed buffer: convert to PNG
  18391. res = mupdf.fz_new_buffer_from_image_as_png(
  18392. img,
  18393. mupdf.FzColorParams(mupdf.fz_default_color_params),
  18394. )
  18395. ext = "png"
  18396. elif ext == "jpeg" and img.n() == 4:
  18397. # JPEG with CMYK: invert colors
  18398. res = mupdf.fz_new_buffer_from_image_as_jpeg(
  18399. img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1)
  18400. else:
  18401. # copy the compressed buffer
  18402. res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
  18403. bytes_ = JM_BinFromBuffer(res)
  18404. img_dict[dictkey_width] = img.w()
  18405. img_dict[dictkey_height] = img.h()
  18406. img_dict[dictkey_ext] = ext
  18407. img_dict[dictkey_colorspace] = img.n()
  18408. img_dict[dictkey_xres] = img.xres()
  18409. img_dict[dictkey_yres] = img.yres()
  18410. img_dict[dictkey_bpc] = img.bpc()
  18411. img_dict[dictkey_size] = len(bytes_)
  18412. img_dict[dictkey_image] = bytes_
  18413. def JM_make_image_block(block, block_dict):
  18414. img = block.i_image()
  18415. _make_image_dict(img, block_dict)
  18416. # if the image has a mask, store it as a PNG buffer
  18417. mask = img.mask()
  18418. if mask.m_internal:
  18419. buff = mask.fz_new_buffer_from_image_as_png(mupdf.FzColorParams(mupdf.fz_default_color_params))
  18420. block_dict["mask"] = buff.fz_buffer_extract()
  18421. else:
  18422. block_dict["mask"] = None
  18423. block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
  18424. def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
  18425. if 1 or g_use_extra:
  18426. return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
  18427. line_list = []
  18428. block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  18429. #log(f'{block=}')
  18430. for line in block:
  18431. #log(f'{line=}')
  18432. if (mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(line.m_internal.bbox)))
  18433. and not mupdf.fz_is_infinite_rect(tp_rect)
  18434. ):
  18435. continue
  18436. line_dict = dict()
  18437. line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
  18438. block_rect = mupdf.fz_union_rect(block_rect, line_rect)
  18439. line_dict[dictkey_wmode] = line.m_internal.wmode
  18440. line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir)
  18441. line_dict[dictkey_bbox] = JM_py_from_rect(line_rect)
  18442. line_list.append(line_dict)
  18443. block_dict[dictkey_bbox] = JM_py_from_rect(block_rect)
  18444. block_dict[dictkey_lines] = line_list
  18445. def JM_make_textpage_dict(tp, page_dict, raw):
  18446. if 1 or g_use_extra:
  18447. return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
  18448. text_buffer = mupdf.fz_new_buffer(128)
  18449. block_list = []
  18450. tp_rect = mupdf.FzRect(tp.m_internal.mediabox)
  18451. block_n = -1
  18452. #log( 'JM_make_textpage_dict {=tp}')
  18453. for block in tp:
  18454. block_n += 1
  18455. if (not mupdf.fz_contains_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox))
  18456. and not mupdf.fz_is_infinite_rect(tp_rect)
  18457. and block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE
  18458. ):
  18459. continue
  18460. if (not mupdf.fz_is_infinite_rect(tp_rect)
  18461. and mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox)))
  18462. ):
  18463. continue
  18464. block_dict = dict()
  18465. block_dict[dictkey_number] = block_n
  18466. block_dict[dictkey_type] = block.m_internal.type
  18467. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE:
  18468. block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
  18469. JM_make_image_block(block, block_dict)
  18470. else:
  18471. JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect)
  18472. block_list.append(block_dict)
  18473. page_dict[dictkey_blocks] = block_list
  18474. def JM_matrix_from_py(m):
  18475. a = [0, 0, 0, 0, 0, 0]
  18476. if isinstance(m, mupdf.FzMatrix):
  18477. return m
  18478. if isinstance(m, Matrix):
  18479. return mupdf.FzMatrix(m.a, m.b, m.c, m.d, m.e, m.f)
  18480. if not m or not PySequence_Check(m) or PySequence_Size(m) != 6:
  18481. return mupdf.FzMatrix()
  18482. for i in range(6):
  18483. a[i] = JM_FLOAT_ITEM(m, i)
  18484. if a[i] is None:
  18485. return mupdf.FzRect()
  18486. return mupdf.FzMatrix(a[0], a[1], a[2], a[3], a[4], a[5])
  18487. def JM_mediabox(page_obj):
  18488. '''
  18489. return a PDF page's MediaBox
  18490. '''
  18491. page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  18492. mediabox = mupdf.pdf_to_rect(
  18493. mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('MediaBox'))
  18494. )
  18495. if mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox):
  18496. mediabox.x0 = 0
  18497. mediabox.y0 = 0
  18498. mediabox.x1 = 612
  18499. mediabox.y1 = 792
  18500. page_mediabox = mupdf.FzRect(
  18501. mupdf.fz_min(mediabox.x0, mediabox.x1),
  18502. mupdf.fz_min(mediabox.y0, mediabox.y1),
  18503. mupdf.fz_max(mediabox.x0, mediabox.x1),
  18504. mupdf.fz_max(mediabox.y0, mediabox.y1),
  18505. )
  18506. if (page_mediabox.x1 - page_mediabox.x0 < 1
  18507. or page_mediabox.y1 - page_mediabox.y0 < 1
  18508. ):
  18509. page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  18510. return page_mediabox
  18511. def JM_merge_range(
  18512. doc_des,
  18513. doc_src,
  18514. spage,
  18515. epage,
  18516. apage,
  18517. rotate,
  18518. links,
  18519. annots,
  18520. show_progress,
  18521. graft_map,
  18522. ):
  18523. '''
  18524. Copy a range of pages (spage, epage) from a source PDF to a specified
  18525. location (apage) of the target PDF.
  18526. If spage > epage, the sequence of source pages is reversed.
  18527. '''
  18528. if g_use_extra:
  18529. return extra.JM_merge_range(
  18530. doc_des,
  18531. doc_src,
  18532. spage,
  18533. epage,
  18534. apage,
  18535. rotate,
  18536. links,
  18537. annots,
  18538. show_progress,
  18539. graft_map,
  18540. )
  18541. afterpage = apage
  18542. counter = 0 # copied pages counter
  18543. total = mupdf.fz_absi(epage - spage) + 1 # total pages to copy
  18544. if spage < epage:
  18545. page = spage
  18546. while page <= epage:
  18547. page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
  18548. counter += 1
  18549. if show_progress > 0 and counter % show_progress == 0:
  18550. message(f"Inserted {counter} of {total} pages.")
  18551. page += 1
  18552. afterpage += 1
  18553. else:
  18554. page = spage
  18555. while page >= epage:
  18556. page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
  18557. counter += 1
  18558. if show_progress > 0 and counter % show_progress == 0:
  18559. message(f"Inserted {counter} of {total} pages.")
  18560. page -= 1
  18561. afterpage += 1
  18562. def JM_merge_resources( page, temp_res):
  18563. '''
  18564. Merge the /Resources object created by a text pdf device into the page.
  18565. The device may have created multiple /ExtGState/Alp? and /Font/F? objects.
  18566. These need to be renamed (renumbered) to not overwrite existing page
  18567. objects from previous executions.
  18568. Returns the next available numbers n, m for objects /Alp<n>, /F<m>.
  18569. '''
  18570. # page objects /Resources, /Resources/ExtGState, /Resources/Font
  18571. resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
  18572. if not resources.m_internal:
  18573. resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 5)
  18574. main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
  18575. main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
  18576. # text pdf device objects /ExtGState, /Font
  18577. temp_extg = mupdf.pdf_dict_get(temp_res, PDF_NAME('ExtGState'))
  18578. temp_fonts = mupdf.pdf_dict_get(temp_res, PDF_NAME('Font'))
  18579. max_alp = -1
  18580. max_fonts = -1
  18581. # Handle /Alp objects
  18582. if mupdf.pdf_is_dict(temp_extg): # any created at all?
  18583. n = mupdf.pdf_dict_len(temp_extg)
  18584. if mupdf.pdf_is_dict(main_extg): # does page have /ExtGState yet?
  18585. for i in range(mupdf.pdf_dict_len(main_extg)):
  18586. # get highest number of objects named /Alpxxx
  18587. alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key(main_extg, i))
  18588. if not alp.startswith('Alp'):
  18589. continue
  18590. j = mupdf.fz_atoi(alp[3:])
  18591. if j > max_alp:
  18592. max_alp = j
  18593. else: # create a /ExtGState for the page
  18594. main_extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), n)
  18595. max_alp += 1
  18596. for i in range(n): # copy over renumbered /Alp objects
  18597. alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_extg, i))
  18598. j = mupdf.fz_atoi(alp[3:]) + max_alp
  18599. text = f'Alp{j}'
  18600. val = mupdf.pdf_dict_get_val( temp_extg, i)
  18601. mupdf.pdf_dict_puts(main_extg, text, val)
  18602. if mupdf.pdf_is_dict(main_fonts): # has page any fonts yet?
  18603. for i in range(mupdf.pdf_dict_len(main_fonts)): # get max font number
  18604. font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( main_fonts, i))
  18605. if not font.startswith("F"):
  18606. continue
  18607. j = mupdf.fz_atoi(font[1:])
  18608. if j > max_fonts:
  18609. max_fonts = j
  18610. else: # create a Resources/Font for the page
  18611. main_fonts = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Font'), 2)
  18612. max_fonts += 1
  18613. for i in range(mupdf.pdf_dict_len(temp_fonts)): # copy renumbered fonts
  18614. font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_fonts, i))
  18615. j = mupdf.fz_atoi(font[1:]) + max_fonts
  18616. text = f'F{j}'
  18617. val = mupdf.pdf_dict_get_val(temp_fonts, i)
  18618. mupdf.pdf_dict_puts(main_fonts, text, val)
  18619. return (max_alp, max_fonts) # next available numbers
  18620. def JM_mupdf_warning( text):
  18621. '''
  18622. redirect MuPDF warnings
  18623. '''
  18624. JM_mupdf_warnings_store.append(text)
  18625. if JM_mupdf_show_warnings:
  18626. message(f'MuPDF warning: {text}')
  18627. def JM_mupdf_error( text):
  18628. JM_mupdf_warnings_store.append(text)
  18629. if JM_mupdf_show_errors:
  18630. message(f'MuPDF error: {text}\n')
  18631. def JM_new_bbox_device(rc, inc_layers):
  18632. assert isinstance(rc, list)
  18633. return JM_new_bbox_device_Device( rc, inc_layers)
  18634. def JM_new_buffer_from_stext_page(page):
  18635. '''
  18636. make a buffer from an stext_page's text
  18637. '''
  18638. assert isinstance(page, mupdf.FzStextPage)
  18639. rect = mupdf.FzRect(page.m_internal.mediabox)
  18640. buf = mupdf.fz_new_buffer(256)
  18641. for block in page:
  18642. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  18643. for line in block:
  18644. for ch in line:
  18645. if (not JM_rects_overlap(rect, JM_char_bbox(line, ch))
  18646. and not mupdf.fz_is_infinite_rect(rect)
  18647. ):
  18648. continue
  18649. mupdf.fz_append_rune(buf, ch.m_internal.c)
  18650. mupdf.fz_append_byte(buf, ord('\n'))
  18651. mupdf.fz_append_byte(buf, ord('\n'))
  18652. return buf
  18653. def JM_new_javascript(pdf, value):
  18654. '''
  18655. make new PDF action object from JavaScript source
  18656. Parameters are a PDF document and a Python string.
  18657. Returns a PDF action object.
  18658. '''
  18659. if value is None:
  18660. # no argument given
  18661. return
  18662. data = JM_StrAsChar(value)
  18663. if data is None:
  18664. # not convertible to char*
  18665. return
  18666. res = mupdf.fz_new_buffer_from_copied_data(data.encode('utf8'))
  18667. source = mupdf.pdf_add_stream(pdf, res, mupdf.PdfObj(), 0)
  18668. newaction = mupdf.pdf_add_new_dict(pdf, 4)
  18669. mupdf.pdf_dict_put(newaction, PDF_NAME('S'), mupdf.pdf_new_name('JavaScript'))
  18670. mupdf.pdf_dict_put(newaction, PDF_NAME('JS'), source)
  18671. return newaction
  18672. def JM_new_output_fileptr(bio):
  18673. return JM_new_output_fileptr_Output( bio)
  18674. def JM_norm_rotation(rotate):
  18675. '''
  18676. # return normalized /Rotate value:one of 0, 90, 180, 270
  18677. '''
  18678. while rotate < 0:
  18679. rotate += 360
  18680. while rotate >= 360:
  18681. rotate -= 360
  18682. if rotate % 90 != 0:
  18683. return 0
  18684. return rotate
  18685. def JM_object_to_buffer(what, compress, ascii):
  18686. res = mupdf.fz_new_buffer(512)
  18687. out = mupdf.FzOutput(res)
  18688. mupdf.pdf_print_obj(out, what, compress, ascii)
  18689. out.fz_close_output()
  18690. mupdf.fz_terminate_buffer(res)
  18691. return res
  18692. def JM_outline_xrefs(obj, xrefs):
  18693. '''
  18694. Return list of outline xref numbers. Recursive function. Arguments:
  18695. 'obj' first OL item
  18696. 'xrefs' empty Python list
  18697. '''
  18698. if not obj.m_internal:
  18699. return xrefs
  18700. thisobj = obj
  18701. while thisobj.m_internal:
  18702. newxref = mupdf.pdf_to_num( thisobj)
  18703. if newxref in xrefs or mupdf.pdf_dict_get( thisobj, PDF_NAME('Type')).m_internal:
  18704. # circular ref or top of chain: terminate
  18705. break
  18706. xrefs.append( newxref)
  18707. first = mupdf.pdf_dict_get( thisobj, PDF_NAME('First')) # try go down
  18708. if mupdf.pdf_is_dict( first):
  18709. xrefs = JM_outline_xrefs( first, xrefs)
  18710. thisobj = mupdf.pdf_dict_get( thisobj, PDF_NAME('Next')) # try go next
  18711. parent = mupdf.pdf_dict_get( thisobj, PDF_NAME('Parent')) # get parent
  18712. if not mupdf.pdf_is_dict( thisobj):
  18713. thisobj = parent
  18714. return xrefs
  18715. def JM_page_rotation(page):
  18716. '''
  18717. return a PDF page's /Rotate value: one of (0, 90, 180, 270)
  18718. '''
  18719. rotate = 0
  18720. obj = mupdf.pdf_dict_get_inheritable( page.obj(), mupdf.PDF_ENUM_NAME_Rotate)
  18721. rotate = mupdf.pdf_to_int(obj)
  18722. rotate = JM_norm_rotation(rotate)
  18723. return rotate
  18724. def JM_pdf_obj_from_str(doc, src):
  18725. '''
  18726. create PDF object from given string (new in v1.14.0: MuPDF dropped it)
  18727. '''
  18728. # fixme: seems inefficient to convert to bytes instance then make another
  18729. # copy inside fz_new_buffer_from_copied_data(), but no other way?
  18730. #
  18731. buffer_ = mupdf.fz_new_buffer_from_copied_data(bytes(src, 'utf8'))
  18732. stream = mupdf.fz_open_buffer(buffer_)
  18733. lexbuf = mupdf.PdfLexbuf(mupdf.PDF_LEXBUF_SMALL)
  18734. result = mupdf.pdf_parse_stm_obj(doc, stream, lexbuf)
  18735. return result
  18736. def JM_pixmap_from_display_list(
  18737. list_,
  18738. ctm,
  18739. cs,
  18740. alpha,
  18741. clip,
  18742. seps,
  18743. ):
  18744. '''
  18745. Version of fz_new_pixmap_from_display_list (util.c) to also support
  18746. rendering of only the 'clip' part of the displaylist rectangle
  18747. '''
  18748. assert isinstance(list_, mupdf.FzDisplayList)
  18749. if seps is None:
  18750. seps = mupdf.FzSeparations()
  18751. assert seps is None or isinstance(seps, mupdf.FzSeparations), f'{type(seps)=}: {seps}'
  18752. rect = mupdf.fz_bound_display_list(list_)
  18753. matrix = JM_matrix_from_py(ctm)
  18754. rclip = JM_rect_from_py(clip)
  18755. rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
  18756. rect = mupdf.fz_transform_rect(rect, matrix)
  18757. irect = mupdf.fz_round_rect(rect)
  18758. assert isinstance( cs, mupdf.FzColorspace)
  18759. pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha)
  18760. if alpha:
  18761. mupdf.fz_clear_pixmap(pix)
  18762. else:
  18763. mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
  18764. if not mupdf.fz_is_infinite_rect(rclip):
  18765. dev = mupdf.fz_new_draw_device_with_bbox(matrix, pix, irect)
  18766. mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), rclip, mupdf.FzCookie())
  18767. else:
  18768. dev = mupdf.fz_new_draw_device(matrix, pix)
  18769. mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), mupdf.FzCookie())
  18770. mupdf.fz_close_device(dev)
  18771. # Use special raw Pixmap constructor so we don't set alpha to true.
  18772. return Pixmap( 'raw', pix)
  18773. def JM_point_from_py(p):
  18774. '''
  18775. PySequence to fz_point. Default: (FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
  18776. '''
  18777. if isinstance(p, mupdf.FzPoint):
  18778. return p
  18779. if isinstance(p, Point):
  18780. return mupdf.FzPoint(p.x, p.y)
  18781. if g_use_extra:
  18782. return extra.JM_point_from_py( p)
  18783. p0 = mupdf.FzPoint(0, 0)
  18784. x = JM_FLOAT_ITEM(p, 0)
  18785. y = JM_FLOAT_ITEM(p, 1)
  18786. if x is None or y is None:
  18787. return p0
  18788. x = max( x, FZ_MIN_INF_RECT)
  18789. y = max( y, FZ_MIN_INF_RECT)
  18790. x = min( x, FZ_MAX_INF_RECT)
  18791. y = min( y, FZ_MAX_INF_RECT)
  18792. return mupdf.FzPoint(x, y)
  18793. def JM_print_stext_page_as_text(res, page):
  18794. '''
  18795. Plain text output. An identical copy of fz_print_stext_page_as_text,
  18796. but lines within a block are concatenated by space instead a new-line
  18797. character (which else leads to 2 new-lines).
  18798. '''
  18799. if 1 and g_use_extra:
  18800. return extra.JM_print_stext_page_as_text(res, page)
  18801. assert isinstance(res, mupdf.FzBuffer)
  18802. assert isinstance(page, mupdf.FzStextPage)
  18803. rect = mupdf.FzRect(page.m_internal.mediabox)
  18804. last_char = 0
  18805. n_blocks = 0
  18806. n_lines = 0
  18807. n_chars = 0
  18808. for n_blocks2, block in enumerate( page):
  18809. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  18810. for n_lines2, line in enumerate( block):
  18811. for n_chars2, ch in enumerate( line):
  18812. pass
  18813. n_chars += n_chars2
  18814. n_lines += n_lines2
  18815. n_blocks += n_blocks2
  18816. for block in page:
  18817. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  18818. for line in block:
  18819. last_char = 0
  18820. for ch in line:
  18821. chbbox = JM_char_bbox(line, ch)
  18822. if (mupdf.fz_is_infinite_rect(rect)
  18823. or JM_rects_overlap(rect, chbbox)
  18824. ):
  18825. #raw += chr(ch.m_internal.c)
  18826. last_char = ch.m_internal.c
  18827. #log( '{=last_char!r utf!r}')
  18828. JM_append_rune(res, last_char)
  18829. if last_char != 10 and last_char > 0:
  18830. mupdf.fz_append_string(res, "\n")
  18831. def JM_put_script(annot_obj, key1, key2, value):
  18832. '''
  18833. Create a JavaScript PDF action.
  18834. Usable for all object types which support PDF actions, even if the
  18835. argument name suggests annotations. Up to 2 key values can be specified, so
  18836. JavaScript actions can be stored for '/A' and '/AA/?' keys.
  18837. '''
  18838. key1_obj = mupdf.pdf_dict_get(annot_obj, key1)
  18839. pdf = mupdf.pdf_get_bound_document(annot_obj) # owning PDF
  18840. # if no new script given, just delete corresponding key
  18841. if not value:
  18842. if key2 is None or not key2.m_internal:
  18843. mupdf.pdf_dict_del(annot_obj, key1)
  18844. elif key1_obj.m_internal:
  18845. mupdf.pdf_dict_del(key1_obj, key2)
  18846. return
  18847. # read any existing script as a PyUnicode string
  18848. if not key2.m_internal or not key1_obj.m_internal:
  18849. script = JM_get_script(key1_obj)
  18850. else:
  18851. script = JM_get_script(mupdf.pdf_dict_get(key1_obj, key2))
  18852. # replace old script, if different from new one
  18853. if value != script:
  18854. newaction = JM_new_javascript(pdf, value)
  18855. if not key2.m_internal:
  18856. mupdf.pdf_dict_put(annot_obj, key1, newaction)
  18857. else:
  18858. mupdf.pdf_dict_putl(annot_obj, newaction, key1, key2)
  18859. def JM_py_from_irect(r):
  18860. return r.x0, r.y0, r.x1, r.y1
  18861. def JM_py_from_matrix(m):
  18862. return m.a, m.b, m.c, m.d, m.e, m.f
  18863. def JM_py_from_point(p):
  18864. return p.x, p.y
  18865. def JM_py_from_quad(q):
  18866. '''
  18867. PySequence from fz_quad.
  18868. '''
  18869. return (
  18870. (q.ul.x, q.ul.y),
  18871. (q.ur.x, q.ur.y),
  18872. (q.ll.x, q.ll.y),
  18873. (q.lr.x, q.lr.y),
  18874. )
  18875. def JM_py_from_rect(r):
  18876. return r.x0, r.y0, r.x1, r.y1
  18877. def JM_quad_from_py(r):
  18878. if isinstance(r, mupdf.FzQuad):
  18879. return r
  18880. # cover all cases of 4-float-sequences
  18881. if hasattr(r, "__getitem__") and len(r) == 4 and hasattr(r[0], "__float__"):
  18882. r = mupdf.FzRect(*tuple(r))
  18883. if isinstance( r, mupdf.FzRect):
  18884. return mupdf.fz_quad_from_rect( r)
  18885. if isinstance( r, Quad):
  18886. return mupdf.fz_make_quad(
  18887. r.ul.x, r.ul.y,
  18888. r.ur.x, r.ur.y,
  18889. r.ll.x, r.ll.y,
  18890. r.lr.x, r.lr.y,
  18891. )
  18892. q = mupdf.fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0)
  18893. p = [0,0,0,0]
  18894. if not r or not isinstance(r, (tuple, list)) or len(r) != 4:
  18895. return q
  18896. if JM_FLOAT_ITEM(r, 0) is None:
  18897. return mupdf.fz_quad_from_rect(JM_rect_from_py(r))
  18898. for i in range(4):
  18899. if i >= len(r):
  18900. return q # invalid: cancel the rest
  18901. obj = r[i] # next point item
  18902. if not PySequence_Check(obj) or PySequence_Size(obj) != 2:
  18903. return q # invalid: cancel the rest
  18904. p[i].x = JM_FLOAT_ITEM(obj, 0)
  18905. p[i].y = JM_FLOAT_ITEM(obj, 1)
  18906. if p[i].x is None or p[i].y is None:
  18907. return q
  18908. p[i].x = max( p[i].x, FZ_MIN_INF_RECT)
  18909. p[i].y = max( p[i].y, FZ_MIN_INF_RECT)
  18910. p[i].x = min( p[i].x, FZ_MAX_INF_RECT)
  18911. p[i].y = min( p[i].y, FZ_MAX_INF_RECT)
  18912. q.ul = p[0]
  18913. q.ur = p[1]
  18914. q.ll = p[2]
  18915. q.lr = p[3]
  18916. return q
  18917. def JM_read_contents(pageref):
  18918. '''
  18919. Read and concatenate a PDF page's /Contents object(s) in a buffer
  18920. '''
  18921. assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}'
  18922. contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents)
  18923. if mupdf.pdf_is_array(contents):
  18924. res = mupdf.FzBuffer(1024)
  18925. for i in range(mupdf.pdf_array_len(contents)):
  18926. if i > 0:
  18927. mupdf.fz_append_byte(res, 32)
  18928. obj = mupdf.pdf_array_get(contents, i)
  18929. if mupdf.pdf_is_stream(obj):
  18930. nres = mupdf.pdf_load_stream(obj)
  18931. mupdf.fz_append_buffer(res, nres)
  18932. elif contents.m_internal:
  18933. res = mupdf.pdf_load_stream(contents)
  18934. else:
  18935. res = mupdf.FzBuffer(0)
  18936. return res
  18937. def JM_rect_from_py(r):
  18938. if isinstance(r, mupdf.FzRect):
  18939. return r
  18940. if isinstance(r, mupdf.FzIrect):
  18941. return mupdf.FzRect(r)
  18942. if isinstance(r, Rect):
  18943. return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
  18944. if isinstance(r, IRect):
  18945. return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
  18946. if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
  18947. return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
  18948. f = [0, 0, 0, 0]
  18949. for i in range(4):
  18950. f[i] = JM_FLOAT_ITEM(r, i)
  18951. if f[i] is None:
  18952. return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
  18953. if f[i] < FZ_MIN_INF_RECT:
  18954. f[i] = FZ_MIN_INF_RECT
  18955. if f[i] > FZ_MAX_INF_RECT:
  18956. f[i] = FZ_MAX_INF_RECT
  18957. return mupdf.fz_make_rect(f[0], f[1], f[2], f[3])
  18958. def JM_rects_overlap(a, b):
  18959. if (0
  18960. or a.x0 >= b.x1
  18961. or a.y0 >= b.y1
  18962. or a.x1 <= b.x0
  18963. or a.y1 <= b.y0
  18964. ):
  18965. return 0
  18966. return 1
  18967. def JM_refresh_links( page):
  18968. '''
  18969. refreshes the link and annotation tables of a page
  18970. '''
  18971. if page is None or not page.m_internal:
  18972. return
  18973. obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
  18974. if obj.m_internal:
  18975. pdf = page.doc()
  18976. number = mupdf.pdf_lookup_page_number( pdf, page.obj())
  18977. page_mediabox = mupdf.FzRect()
  18978. page_ctm = mupdf.FzMatrix()
  18979. mupdf.pdf_page_transform( page, page_mediabox, page_ctm)
  18980. link = mupdf.pdf_load_link_annots( pdf, page, obj, number, page_ctm)
  18981. page.m_internal.links = mupdf.ll_fz_keep_link( link.m_internal)
  18982. def JM_rotate_page_matrix(page):
  18983. '''
  18984. calculate page rotation matrices
  18985. '''
  18986. if not page.m_internal:
  18987. return mupdf.FzMatrix() # no valid pdf page given
  18988. rotation = JM_page_rotation(page)
  18989. #log( '{rotation=}')
  18990. if rotation == 0:
  18991. return mupdf.FzMatrix() # no rotation
  18992. cb_size = JM_cropbox_size(page.obj())
  18993. w = cb_size.x
  18994. h = cb_size.y
  18995. #log( '{=h w}')
  18996. if rotation == 90:
  18997. m = mupdf.fz_make_matrix(0, 1, -1, 0, h, 0)
  18998. elif rotation == 180:
  18999. m = mupdf.fz_make_matrix(-1, 0, 0, -1, w, h)
  19000. else:
  19001. m = mupdf.fz_make_matrix(0, -1, 1, 0, 0, w)
  19002. #log( 'returning {m=}')
  19003. return m
  19004. def JM_search_stext_page(page, needle):
  19005. if 1 or g_use_extra:
  19006. return extra.JM_search_stext_page(page.m_internal, needle)
  19007. rect = mupdf.FzRect(page.m_internal.mediabox)
  19008. if not needle:
  19009. return
  19010. quads = []
  19011. class Hits:
  19012. def __str__(self):
  19013. return f'Hits(len={self.len} quads={self.quads} hfuzz={self.hfuzz} vfuzz={self.vfuzz}'
  19014. hits = Hits()
  19015. hits.len = 0
  19016. hits.quads = quads
  19017. hits.hfuzz = 0.2 # merge kerns but not large gaps
  19018. hits.vfuzz = 0.1
  19019. buffer_ = JM_new_buffer_from_stext_page(page)
  19020. haystack_string = mupdf.fz_string_from_buffer(buffer_)
  19021. haystack = 0
  19022. begin, end = find_string(haystack_string[haystack:], needle)
  19023. if begin is None:
  19024. #goto no_more_matches;
  19025. return quads
  19026. begin += haystack
  19027. end += haystack
  19028. inside = 0
  19029. i = 0
  19030. for block in page:
  19031. if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
  19032. continue
  19033. for line in block:
  19034. for ch in line:
  19035. i += 1
  19036. if not mupdf.fz_is_infinite_rect(rect):
  19037. r = JM_char_bbox(line, ch)
  19038. if not JM_rects_overlap(rect, r):
  19039. #goto next_char;
  19040. continue
  19041. while 1:
  19042. #try_new_match:
  19043. if not inside:
  19044. if haystack >= begin:
  19045. inside = 1
  19046. if inside:
  19047. if haystack < end:
  19048. on_highlight_char(hits, line, ch)
  19049. break
  19050. else:
  19051. inside = 0
  19052. begin, end = find_string(haystack_string[haystack:], needle)
  19053. if begin is None:
  19054. #goto no_more_matches;
  19055. return quads
  19056. else:
  19057. #goto try_new_match;
  19058. begin += haystack
  19059. end += haystack
  19060. continue
  19061. break
  19062. haystack += 1
  19063. #next_char:;
  19064. assert haystack_string[haystack] == '\n', \
  19065. f'{haystack=} {haystack_string[haystack]=}'
  19066. haystack += 1
  19067. assert haystack_string[haystack] == '\n', \
  19068. f'{haystack=} {haystack_string[haystack]=}'
  19069. haystack += 1
  19070. #no_more_matches:;
  19071. return quads
  19072. def JM_scan_resources(pdf, rsrc, liste, what, stream_xref, tracer):
  19073. '''
  19074. Step through /Resources, looking up image, xobject or font information
  19075. '''
  19076. if mupdf.pdf_mark_obj(rsrc):
  19077. mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
  19078. return # Circular dependencies!
  19079. try:
  19080. xobj = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_XObject)
  19081. if what == 1: # lookup fonts
  19082. font = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_Font)
  19083. JM_gather_fonts(pdf, font, liste, stream_xref)
  19084. elif what == 2: # look up images
  19085. JM_gather_images(pdf, xobj, liste, stream_xref)
  19086. elif what == 3: # look up form xobjects
  19087. JM_gather_forms(pdf, xobj, liste, stream_xref)
  19088. else: # should never happen
  19089. return
  19090. # check if we need to recurse into Form XObjects
  19091. n = mupdf.pdf_dict_len(xobj)
  19092. for i in range(n):
  19093. obj = mupdf.pdf_dict_get_val(xobj, i)
  19094. if mupdf.pdf_is_stream(obj):
  19095. sxref = mupdf.pdf_to_num(obj)
  19096. else:
  19097. sxref = 0
  19098. subrsrc = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Resources)
  19099. if subrsrc.m_internal:
  19100. sxref_t = sxref
  19101. if sxref_t not in tracer:
  19102. tracer.append(sxref_t)
  19103. JM_scan_resources( pdf, subrsrc, liste, what, sxref, tracer)
  19104. else:
  19105. mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
  19106. return
  19107. finally:
  19108. mupdf.pdf_unmark_obj(rsrc)
  19109. def JM_set_choice_options(annot, liste):
  19110. '''
  19111. set ListBox / ComboBox values
  19112. '''
  19113. if not liste:
  19114. return
  19115. assert isinstance( liste, (tuple, list))
  19116. n = len( liste)
  19117. if n == 0:
  19118. return
  19119. annot_obj = mupdf.pdf_annot_obj( annot)
  19120. pdf = mupdf.pdf_get_bound_document( annot_obj)
  19121. optarr = mupdf.pdf_new_array( pdf, n)
  19122. for i in range(n):
  19123. val = liste[i]
  19124. opt = val
  19125. if isinstance(opt, str):
  19126. mupdf.pdf_array_push_text_string( optarr, opt)
  19127. else:
  19128. assert isinstance( val, (tuple, list)) and len( val) == 2, 'bad choice field list'
  19129. opt1, opt2 = val
  19130. assert opt1 and opt2, 'bad choice field list'
  19131. optarrsub = mupdf.pdf_array_push_array( optarr, 2)
  19132. mupdf.pdf_array_push_text_string( optarrsub, opt1)
  19133. mupdf.pdf_array_push_text_string( optarrsub, opt2)
  19134. mupdf.pdf_dict_put( annot_obj, PDF_NAME('Opt'), optarr)
  19135. def JM_set_field_type(doc, obj, type):
  19136. '''
  19137. Set the field type
  19138. '''
  19139. setbits = 0
  19140. clearbits = 0
  19141. typename = None
  19142. if type == mupdf.PDF_WIDGET_TYPE_BUTTON:
  19143. typename = PDF_NAME('Btn')
  19144. setbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
  19145. elif type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  19146. typename = PDF_NAME('Btn')
  19147. clearbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
  19148. setbits = mupdf.PDF_BTN_FIELD_IS_RADIO
  19149. elif type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
  19150. typename = PDF_NAME('Btn')
  19151. clearbits = (mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | mupdf.PDF_BTN_FIELD_IS_RADIO)
  19152. elif type == mupdf.PDF_WIDGET_TYPE_TEXT:
  19153. typename = PDF_NAME('Tx')
  19154. elif type == mupdf.PDF_WIDGET_TYPE_LISTBOX:
  19155. typename = PDF_NAME('Ch')
  19156. clearbits = mupdf.PDF_CH_FIELD_IS_COMBO
  19157. elif type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
  19158. typename = PDF_NAME('Ch')
  19159. setbits = mupdf.PDF_CH_FIELD_IS_COMBO
  19160. elif type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  19161. typename = PDF_NAME('Sig')
  19162. if typename is not None and typename.m_internal:
  19163. mupdf.pdf_dict_put(obj, PDF_NAME('FT'), typename)
  19164. if setbits != 0 or clearbits != 0:
  19165. bits = mupdf.pdf_dict_get_int(obj, PDF_NAME('Ff'))
  19166. bits &= ~clearbits
  19167. bits |= setbits
  19168. mupdf.pdf_dict_put_int(obj, PDF_NAME('Ff'), bits)
  19169. def JM_set_object_value(obj, key, value):
  19170. '''
  19171. Set a PDF dict key to some value
  19172. '''
  19173. eyecatcher = "fitz: replace me!"
  19174. pdf = mupdf.pdf_get_bound_document(obj)
  19175. # split PDF key at path seps and take last key part
  19176. list_ = key.split('/')
  19177. len_ = len(list_)
  19178. i = len_ - 1
  19179. skey = list_[i]
  19180. del list_[i] # del the last sub-key
  19181. len_ = len(list_) # remaining length
  19182. testkey = mupdf.pdf_dict_getp(obj, key) # check if key already exists
  19183. if not testkey.m_internal:
  19184. #No, it will be created here. But we cannot allow this happening if
  19185. #indirect objects are referenced. So we check all higher level
  19186. #sub-paths for indirect references.
  19187. while len_ > 0:
  19188. t = '/'.join(list_) # next high level
  19189. if mupdf.pdf_is_indirect(mupdf.pdf_dict_getp(obj, JM_StrAsChar(t))):
  19190. raise Exception("path to '%s' has indirects", JM_StrAsChar(skey))
  19191. del list_[len_ - 1] # del last sub-key
  19192. len_ = len(list_) # remaining length
  19193. # Insert our eyecatcher. Will create all sub-paths in the chain, or
  19194. # respectively remove old value of key-path.
  19195. mupdf.pdf_dict_putp(obj, key, mupdf.pdf_new_text_string(eyecatcher))
  19196. testkey = mupdf.pdf_dict_getp(obj, key)
  19197. if not mupdf.pdf_is_string(testkey):
  19198. raise Exception("cannot insert value for '%s'", key)
  19199. temp = mupdf.pdf_to_text_string(testkey)
  19200. if temp != eyecatcher:
  19201. raise Exception("cannot insert value for '%s'", key)
  19202. # read the result as a string
  19203. res = JM_object_to_buffer(obj, 1, 0)
  19204. objstr = JM_EscapeStrFromBuffer(res)
  19205. # replace 'eyecatcher' by desired 'value'
  19206. nullval = "/%s(%s)" % ( skey, eyecatcher)
  19207. newval = "/%s %s" % (skey, value)
  19208. newstr = objstr.replace(nullval, newval, 1)
  19209. # make PDF object from resulting string
  19210. new_obj = JM_pdf_obj_from_str(pdf, newstr)
  19211. return new_obj
  19212. def JM_set_ocg_arrays(conf, basestate, on, off, rbgroups, locked):
  19213. if basestate:
  19214. mupdf.pdf_dict_put_name( conf, PDF_NAME('BaseState'), basestate)
  19215. if on is not None:
  19216. mupdf.pdf_dict_del( conf, PDF_NAME('ON'))
  19217. if on:
  19218. arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('ON'), 1)
  19219. JM_set_ocg_arrays_imp( arr, on)
  19220. if off is not None:
  19221. mupdf.pdf_dict_del( conf, PDF_NAME('OFF'))
  19222. if off:
  19223. arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('OFF'), 1)
  19224. JM_set_ocg_arrays_imp( arr, off)
  19225. if locked is not None:
  19226. mupdf.pdf_dict_del( conf, PDF_NAME('Locked'))
  19227. if locked:
  19228. arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('Locked'), 1)
  19229. JM_set_ocg_arrays_imp( arr, locked)
  19230. if rbgroups is not None:
  19231. mupdf.pdf_dict_del( conf, PDF_NAME('RBGroups'))
  19232. if rbgroups:
  19233. arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('RBGroups'), 1)
  19234. n =len(rbgroups)
  19235. for i in range(n):
  19236. item0 = rbgroups[i]
  19237. obj = mupdf.pdf_array_push_array( arr, 1)
  19238. JM_set_ocg_arrays_imp( obj, item0)
  19239. def JM_set_ocg_arrays_imp(arr, list_):
  19240. '''
  19241. Set OCG arrays from dict of Python lists
  19242. Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list}
  19243. '''
  19244. pdf = mupdf.pdf_get_bound_document(arr)
  19245. for xref in list_:
  19246. obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  19247. mupdf.pdf_array_push(arr, obj)
  19248. def JM_set_resource_property(ref, name, xref):
  19249. '''
  19250. Insert an item into Resources/Properties (used for Marked Content)
  19251. Arguments:
  19252. (1) e.g. page object, Form XObject
  19253. (2) marked content name
  19254. (3) xref of the referenced object (insert as indirect reference)
  19255. '''
  19256. pdf = mupdf.pdf_get_bound_document(ref)
  19257. ind = mupdf.pdf_new_indirect(pdf, xref, 0)
  19258. if not ind.m_internal:
  19259. RAISEPY(MSG_BAD_XREF, PyExc_ValueError)
  19260. resources = mupdf.pdf_dict_get(ref, PDF_NAME('Resources'))
  19261. if not resources.m_internal:
  19262. resources = mupdf.pdf_dict_put_dict(ref, PDF_NAME('Resources'), 1)
  19263. properties = mupdf.pdf_dict_get(resources, PDF_NAME('Properties'))
  19264. if not properties.m_internal:
  19265. properties = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Properties'), 1)
  19266. mupdf.pdf_dict_put(properties, mupdf.pdf_new_name(name), ind)
  19267. def JM_set_widget_properties(annot, Widget):
  19268. '''
  19269. Update the PDF form field with the properties from a Python Widget object.
  19270. Called by "Page.add_widget" and "Annot.update_widget".
  19271. '''
  19272. if isinstance( annot, Annot):
  19273. annot = annot.this
  19274. assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}'
  19275. page = _pdf_annot_page(annot)
  19276. assert page.m_internal, 'Annot is not bound to a page'
  19277. annot_obj = mupdf.pdf_annot_obj(annot)
  19278. pdf = page.doc()
  19279. def GETATTR(name):
  19280. return getattr(Widget, name, None)
  19281. value = GETATTR("field_type")
  19282. field_type = value
  19283. # rectangle --------------------------------------------------------------
  19284. value = GETATTR("rect")
  19285. rect = JM_rect_from_py(value)
  19286. rot_mat = JM_rotate_page_matrix(page)
  19287. rect = mupdf.fz_transform_rect(rect, rot_mat)
  19288. mupdf.pdf_set_annot_rect(annot, rect)
  19289. # fill color -------------------------------------------------------------
  19290. value = GETATTR("fill_color")
  19291. if value and PySequence_Check(value):
  19292. n = len(value)
  19293. fill_col = mupdf.pdf_new_array(pdf, n)
  19294. col = 0
  19295. for i in range(n):
  19296. col = value[i]
  19297. mupdf.pdf_array_push_real(fill_col, col)
  19298. mupdf.pdf_field_set_fill_color(annot_obj, fill_col)
  19299. # dashes -----------------------------------------------------------------
  19300. value = GETATTR("border_dashes")
  19301. if value and PySequence_Check(value):
  19302. n = len(value)
  19303. dashes = mupdf.pdf_new_array(pdf, n)
  19304. for i in range(n):
  19305. mupdf.pdf_array_push_int(dashes, value[i])
  19306. mupdf.pdf_dict_putl(annot_obj, dashes, PDF_NAME('BS'), PDF_NAME('D'))
  19307. # border color -----------------------------------------------------------
  19308. value = GETATTR("border_color")
  19309. if value and PySequence_Check(value):
  19310. n = len(value)
  19311. border_col = mupdf.pdf_new_array(pdf, n)
  19312. col = 0
  19313. for i in range(n):
  19314. col = value[i]
  19315. mupdf.pdf_array_push_real(border_col, col)
  19316. mupdf.pdf_dict_putl(annot_obj, border_col, PDF_NAME('MK'), PDF_NAME('BC'))
  19317. # entry ignored - may be used later
  19318. #
  19319. #int text_format = (int) PyInt_AsLong(GETATTR("text_format"));
  19320. #
  19321. # field label -----------------------------------------------------------
  19322. value = GETATTR("field_label")
  19323. if value is not None:
  19324. label = JM_StrAsChar(value)
  19325. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('TU'), label)
  19326. # field name -------------------------------------------------------------
  19327. value = GETATTR("field_name")
  19328. if value is not None:
  19329. name = JM_StrAsChar(value)
  19330. old_name = mupdf.pdf_load_field_name(annot_obj)
  19331. if name != old_name:
  19332. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), name)
  19333. # max text len -----------------------------------------------------------
  19334. if field_type == mupdf.PDF_WIDGET_TYPE_TEXT:
  19335. value = GETATTR("text_maxlen")
  19336. text_maxlen = value
  19337. if text_maxlen:
  19338. mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('MaxLen'), text_maxlen)
  19339. value = GETATTR("field_display")
  19340. d = value
  19341. mupdf.pdf_field_set_display(annot_obj, d)
  19342. # choice values ----------------------------------------------------------
  19343. if field_type in (mupdf.PDF_WIDGET_TYPE_LISTBOX, mupdf.PDF_WIDGET_TYPE_COMBOBOX):
  19344. value = GETATTR("choice_values")
  19345. JM_set_choice_options(annot, value)
  19346. # border style -----------------------------------------------------------
  19347. value = GETATTR("border_style")
  19348. val = JM_get_border_style(value)
  19349. mupdf.pdf_dict_putl(annot_obj, val, PDF_NAME('BS'), PDF_NAME('S'))
  19350. # border width -----------------------------------------------------------
  19351. value = GETATTR("border_width")
  19352. border_width = value
  19353. mupdf.pdf_dict_putl(
  19354. annot_obj,
  19355. mupdf.pdf_new_real(border_width),
  19356. PDF_NAME('BS'),
  19357. PDF_NAME('W'),
  19358. )
  19359. # /DA string -------------------------------------------------------------
  19360. value = GETATTR("_text_da")
  19361. da = JM_StrAsChar(value)
  19362. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('DA'), da)
  19363. mupdf.pdf_dict_del(annot_obj, PDF_NAME('DS')) # not supported by MuPDF
  19364. mupdf.pdf_dict_del(annot_obj, PDF_NAME('RC')) # not supported by MuPDF
  19365. # field flags ------------------------------------------------------------
  19366. field_flags = GETATTR("field_flags")
  19367. if field_flags is not None:
  19368. if field_type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
  19369. field_flags |= mupdf.PDF_CH_FIELD_IS_COMBO
  19370. elif field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  19371. field_flags |= mupdf.PDF_BTN_FIELD_IS_RADIO
  19372. elif field_type == mupdf.PDF_WIDGET_TYPE_BUTTON:
  19373. field_flags |= mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
  19374. mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Ff'), field_flags)
  19375. # button caption ---------------------------------------------------------
  19376. value = GETATTR("button_caption")
  19377. ca = JM_StrAsChar(value)
  19378. if ca:
  19379. mupdf.pdf_field_set_button_caption(annot_obj, ca)
  19380. # script (/A) -------------------------------------------------------
  19381. value = GETATTR("script")
  19382. JM_put_script(annot_obj, PDF_NAME('A'), mupdf.PdfObj(), value)
  19383. # script (/AA/K) -------------------------------------------------------
  19384. value = GETATTR("script_stroke")
  19385. JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('K'), value)
  19386. # script (/AA/F) -------------------------------------------------------
  19387. value = GETATTR("script_format")
  19388. JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('F'), value)
  19389. # script (/AA/V) -------------------------------------------------------
  19390. value = GETATTR("script_change")
  19391. JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('V'), value)
  19392. # script (/AA/C) -------------------------------------------------------
  19393. value = GETATTR("script_calc")
  19394. JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('C'), value)
  19395. # script (/AA/Bl) -------------------------------------------------------
  19396. value = GETATTR("script_blur")
  19397. JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value)
  19398. # script (/AA/Fo) codespell:ignore --------------------------------------
  19399. value = GETATTR("script_focus")
  19400. JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value)
  19401. # field value ------------------------------------------------------------
  19402. value = GETATTR("field_value") # field value
  19403. text = JM_StrAsChar(value) # convert to text (may fail!)
  19404. if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  19405. if not value:
  19406. mupdf.pdf_set_field_value(pdf, annot_obj, "Off", 1)
  19407. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), "Off")
  19408. else:
  19409. # TODO check if another button in the group is ON and if so set it Off
  19410. onstate = mupdf.pdf_button_field_on_state(annot_obj)
  19411. if onstate.m_internal:
  19412. on = mupdf.pdf_to_name(onstate)
  19413. mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
  19414. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
  19415. elif text:
  19416. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), text)
  19417. elif field_type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
  19418. onstate = mupdf.pdf_button_field_on_state(annot_obj)
  19419. on = onstate.pdf_to_name()
  19420. if value in (True, on) or text == 'Yes':
  19421. mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
  19422. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
  19423. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('V'), on)
  19424. else:
  19425. mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('AS'), 'Off')
  19426. mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('V'), 'Off')
  19427. else:
  19428. if text:
  19429. mupdf.pdf_set_field_value(pdf, annot_obj, text, 1)
  19430. if field_type in (mupdf.PDF_WIDGET_TYPE_COMBOBOX, mupdf.PDF_WIDGET_TYPE_LISTBOX):
  19431. mupdf.pdf_dict_del(annot_obj, PDF_NAME('I'))
  19432. mupdf.pdf_dirty_annot(annot)
  19433. mupdf.pdf_set_annot_hot(annot, 1)
  19434. mupdf.pdf_set_annot_active(annot, 1)
  19435. mupdf.pdf_update_annot(annot)
  19436. def JM_show_string_cs(
  19437. text,
  19438. user_font,
  19439. trm,
  19440. s,
  19441. wmode,
  19442. bidi_level,
  19443. markup_dir,
  19444. language,
  19445. ):
  19446. i = 0
  19447. while i < len(s):
  19448. l, ucs = mupdf.fz_chartorune(s[i:])
  19449. i += l
  19450. gid = mupdf.fz_encode_character_sc(user_font, ucs)
  19451. if gid == 0:
  19452. gid, font = mupdf.fz_encode_character_with_fallback(user_font, ucs, 0, language)
  19453. else:
  19454. font = user_font
  19455. mupdf.fz_show_glyph(text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language)
  19456. adv = mupdf.fz_advance_glyph(font, gid, wmode)
  19457. if wmode == 0:
  19458. trm = mupdf.fz_pre_translate(trm, adv, 0)
  19459. else:
  19460. trm = mupdf.fz_pre_translate(trm, 0, -adv)
  19461. return trm
  19462. def JM_UnicodeFromBuffer(buff):
  19463. buff_bytes = mupdf.fz_buffer_extract_copy(buff)
  19464. val = buff_bytes.decode(errors='replace')
  19465. z = val.find(chr(0))
  19466. if z >= 0:
  19467. val = val[:z]
  19468. return val
  19469. def message_warning(text):
  19470. '''
  19471. Generate a warning.
  19472. '''
  19473. message(f'warning: {text}')
  19474. def JM_update_stream(doc, obj, buffer_, compress):
  19475. '''
  19476. update a stream object
  19477. compress stream when beneficial
  19478. '''
  19479. if compress:
  19480. length, _ = mupdf.fz_buffer_storage(buffer_)
  19481. if length > 30: # ignore small stuff
  19482. buffer_compressed = JM_compress_buffer(buffer_)
  19483. assert isinstance(buffer_compressed, mupdf.FzBuffer)
  19484. if buffer_compressed.m_internal:
  19485. length_compressed, _ = mupdf.fz_buffer_storage(buffer_compressed)
  19486. if length_compressed < length: # was it worth the effort?
  19487. mupdf.pdf_dict_put(
  19488. obj,
  19489. mupdf.PDF_ENUM_NAME_Filter,
  19490. mupdf.PDF_ENUM_NAME_FlateDecode,
  19491. )
  19492. mupdf.pdf_update_stream(doc, obj, buffer_compressed, 1)
  19493. return
  19494. mupdf.pdf_update_stream(doc, obj, buffer_, 0)
  19495. def JM_xobject_from_page(pdfout, fsrcpage, xref, gmap):
  19496. '''
  19497. Make an XObject from a PDF page
  19498. For a positive xref assume that its object can be used instead
  19499. '''
  19500. assert isinstance(gmap, mupdf.PdfGraftMap), f'{type(gmap)=}'
  19501. if xref > 0:
  19502. xobj1 = mupdf.pdf_new_indirect(pdfout, xref, 0)
  19503. else:
  19504. srcpage = _as_pdf_page(fsrcpage.this)
  19505. spageref = srcpage.obj()
  19506. mediabox = mupdf.pdf_to_rect(mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('MediaBox')))
  19507. # Deep-copy resources object of source page
  19508. o = mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('Resources'))
  19509. if gmap.m_internal:
  19510. # use graftmap when possible
  19511. resources = mupdf.pdf_graft_mapped_object(gmap, o)
  19512. else:
  19513. resources = mupdf.pdf_graft_object(pdfout, o)
  19514. # get spgage contents source
  19515. res = JM_read_contents(spageref)
  19516. #-------------------------------------------------------------
  19517. # create XObject representing the source page
  19518. #-------------------------------------------------------------
  19519. xobj1 = mupdf.pdf_new_xobject(pdfout, mediabox, mupdf.FzMatrix(), mupdf.PdfObj(0), res)
  19520. # store spage contents
  19521. JM_update_stream(pdfout, xobj1, res, 1)
  19522. # store spage resources
  19523. mupdf.pdf_dict_put(xobj1, PDF_NAME('Resources'), resources)
  19524. return xobj1
  19525. def PySequence_Check(s):
  19526. return isinstance(s, (tuple, list))
  19527. def PySequence_Size(s):
  19528. return len(s)
  19529. # constants: error messages. These are also in extra.i.
  19530. #
  19531. MSG_BAD_ANNOT_TYPE = "bad annot type"
  19532. MSG_BAD_APN = "bad or missing annot AP/N"
  19533. MSG_BAD_ARG_INK_ANNOT = "arg must be seq of seq of float pairs"
  19534. MSG_BAD_ARG_POINTS = "bad seq of points"
  19535. MSG_BAD_BUFFER = "bad type: 'buffer'"
  19536. MSG_BAD_COLOR_SEQ = "bad color sequence"
  19537. MSG_BAD_DOCUMENT = "cannot open broken document"
  19538. MSG_BAD_FILETYPE = "bad filetype"
  19539. MSG_BAD_LOCATION = "bad location"
  19540. MSG_BAD_OC_CONFIG = "bad config number"
  19541. MSG_BAD_OC_LAYER = "bad layer number"
  19542. MSG_BAD_OC_REF = "bad 'oc' reference"
  19543. MSG_BAD_PAGEID = "bad page id"
  19544. MSG_BAD_PAGENO = "bad page number(s)"
  19545. MSG_BAD_PDFROOT = "PDF has no root"
  19546. MSG_BAD_RECT = "rect is infinite or empty"
  19547. MSG_BAD_TEXT = "bad type: 'text'"
  19548. MSG_BAD_XREF = "bad xref"
  19549. MSG_COLOR_COUNT_FAILED = "color count failed"
  19550. MSG_FILE_OR_BUFFER = "need font file or buffer"
  19551. MSG_FONT_FAILED = "cannot create font"
  19552. MSG_IS_NO_ANNOT = "is no annotation"
  19553. MSG_IS_NO_IMAGE = "is no image"
  19554. MSG_IS_NO_PDF = "is no PDF"
  19555. MSG_IS_NO_DICT = "object is no PDF dict"
  19556. MSG_PIX_NOALPHA = "source pixmap has no alpha"
  19557. MSG_PIXEL_OUTSIDE = "pixel(s) outside image"
  19558. JM_Exc_FileDataError = 'FileDataError'
  19559. PyExc_ValueError = 'ValueError'
  19560. def RAISEPY( msg, exc):
  19561. #JM_Exc_CurrentException=exc
  19562. #fz_throw(context, FZ_ERROR_GENERIC, msg)
  19563. raise Exception( msg)
  19564. def PyUnicode_DecodeRawUnicodeEscape(s, errors='strict'):
  19565. # FIXED: handle raw unicode escape sequences
  19566. if not s:
  19567. return ""
  19568. if isinstance(s, str):
  19569. rc = s.encode("utf8", errors=errors)
  19570. elif isinstance(s, bytes):
  19571. rc = s[:]
  19572. ret = rc.decode('raw_unicode_escape', errors=errors)
  19573. return ret
  19574. def CheckColor(c: OptSeq):
  19575. if c:
  19576. if (
  19577. type(c) not in (list, tuple)
  19578. or len(c) not in (1, 3, 4)
  19579. or min(c) < 0
  19580. or max(c) > 1
  19581. ):
  19582. raise ValueError("need 1, 3 or 4 color components in range 0 to 1")
  19583. def CheckFont(page: Page, fontname: str) -> tuple:
  19584. """Return an entry in the page's font list if reference name matches.
  19585. """
  19586. for f in page.get_fonts():
  19587. if f[4] == fontname:
  19588. return f
  19589. def CheckFontInfo(doc: Document, xref: int) -> list:
  19590. """Return a font info if present in the document.
  19591. """
  19592. for f in doc.FontInfos:
  19593. if xref == f[0]:
  19594. return f
  19595. def CheckMarkerArg(quads: typing.Any) -> tuple:
  19596. if CheckRect(quads):
  19597. r = Rect(quads)
  19598. return (r.quad,)
  19599. if CheckQuad(quads):
  19600. return (quads,)
  19601. for q in quads:
  19602. if not (CheckRect(q) or CheckQuad(q)):
  19603. raise ValueError("bad quads entry")
  19604. return quads
  19605. def CheckMorph(o: typing.Any) -> bool:
  19606. if not bool(o):
  19607. return False
  19608. if not (type(o) in (list, tuple) and len(o) == 2):
  19609. raise ValueError("morph must be a sequence of length 2")
  19610. if not (len(o[0]) == 2 and len(o[1]) == 6):
  19611. raise ValueError("invalid morph param 0")
  19612. if not o[1][4] == o[1][5] == 0:
  19613. raise ValueError("invalid morph param 1")
  19614. return True
  19615. def CheckParent(o: typing.Any):
  19616. return
  19617. if not hasattr(o, "parent") or o.parent is None:
  19618. raise ValueError(f"orphaned object {type(o)=}: parent is None")
  19619. def CheckQuad(q: typing.Any) -> bool:
  19620. """Check whether an object is convex, not empty quad-like.
  19621. It must be a sequence of 4 number pairs.
  19622. """
  19623. try:
  19624. q0 = Quad(q)
  19625. except Exception:
  19626. if g_exceptions_verbose > 1: exception_info()
  19627. return False
  19628. return q0.is_convex
  19629. def CheckRect(r: typing.Any) -> bool:
  19630. """Check whether an object is non-degenerate rect-like.
  19631. It must be a sequence of 4 numbers.
  19632. """
  19633. try:
  19634. r = Rect(r)
  19635. except Exception:
  19636. if g_exceptions_verbose > 1: exception_info()
  19637. return False
  19638. return not (r.is_empty or r.is_infinite)
  19639. def ColorCode(c: typing.Union[list, tuple, float, None], f: str) -> str:
  19640. if not c:
  19641. return ""
  19642. if hasattr(c, "__float__"):
  19643. c = (c,)
  19644. CheckColor(c)
  19645. if len(c) == 1:
  19646. s = _format_g(c[0]) + " "
  19647. return s + "G " if f == "c" else s + "g "
  19648. if len(c) == 3:
  19649. s = _format_g(tuple(c)) + " "
  19650. return s + "RG " if f == "c" else s + "rg "
  19651. s = _format_g(tuple(c)) + " "
  19652. return s + "K " if f == "c" else s + "k "
  19653. def Page__add_text_marker(self, quads, annot_type):
  19654. pdfpage = self._pdf_page()
  19655. rotation = JM_page_rotation(pdfpage)
  19656. def final():
  19657. if rotation != 0:
  19658. mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), rotation)
  19659. try:
  19660. if rotation != 0:
  19661. mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), 0)
  19662. annot = mupdf.pdf_create_annot(pdfpage, annot_type)
  19663. for item in quads:
  19664. q = JM_quad_from_py(item)
  19665. mupdf.pdf_add_annot_quad_point(annot, q)
  19666. mupdf.pdf_update_annot(annot)
  19667. JM_add_annot_id(annot, "A")
  19668. final()
  19669. except Exception:
  19670. if g_exceptions_verbose: exception_info()
  19671. final()
  19672. return
  19673. return Annot(annot)
  19674. def PDF_NAME(x):
  19675. assert isinstance(x, str)
  19676. ret = getattr(mupdf, f'PDF_ENUM_NAME_{x}')
  19677. # Note that we return a (swig proxy for) pdf_obj*, not a mupdf.PdfObj. In
  19678. # the C++ API, the constructor PdfObj::PdfObj(pdf_obj*) is marked as
  19679. # explicit, but this seems to be ignored by SWIG. If SWIG started to
  19680. # generate code that respected `explicit`, we would need to do `return
  19681. # mupdf.PdfObj(ret)`.
  19682. #
  19683. # [Compare with extra.i, where we define our own PDF_NAME2() macro that
  19684. # returns a mupdf::PdfObj.]
  19685. return ret
  19686. def UpdateFontInfo(doc: Document, info: typing.Sequence):
  19687. xref = info[0]
  19688. found = False
  19689. for i, fi in enumerate(doc.FontInfos):
  19690. if fi[0] == xref:
  19691. found = True
  19692. break
  19693. if found:
  19694. doc.FontInfos[i] = info
  19695. else:
  19696. doc.FontInfos.append(info)
  19697. def args_match(args, *types):
  19698. '''
  19699. Returns true if <args> matches <types>.
  19700. Each item in <types> is a type or tuple of types. Any of these types will
  19701. match an item in <args>. `None` will match anything in <args>. `type(None)`
  19702. will match an arg whose value is `None`.
  19703. '''
  19704. j = 0
  19705. for i in range(len(types)):
  19706. type_ = types[i]
  19707. if j >= len(args):
  19708. if isinstance(type_, tuple) and None in type_:
  19709. # arg is missing but has default value.
  19710. continue
  19711. else:
  19712. return False
  19713. if type_ is not None and not isinstance(args[j], type_):
  19714. return False
  19715. j += 1
  19716. if j != len(args):
  19717. return False
  19718. return True
  19719. def calc_image_matrix(width, height, tr, rotate, keep):
  19720. '''
  19721. # compute image insertion matrix
  19722. '''
  19723. trect = JM_rect_from_py(tr)
  19724. rot = mupdf.fz_rotate(rotate)
  19725. trw = trect.x1 - trect.x0
  19726. trh = trect.y1 - trect.y0
  19727. w = trw
  19728. h = trh
  19729. if keep:
  19730. large = max(width, height)
  19731. fw = width / large
  19732. fh = height / large
  19733. else:
  19734. fw = fh = 1
  19735. small = min(fw, fh)
  19736. if rotate != 0 and rotate != 180:
  19737. f = fw
  19738. fw = fh
  19739. fh = f
  19740. if fw < 1:
  19741. if trw / fw > trh / fh:
  19742. w = trh * small
  19743. h = trh
  19744. else:
  19745. w = trw
  19746. h = trw / small
  19747. elif fw != fh:
  19748. if trw / fw > trh / fh:
  19749. w = trh / small
  19750. h = trh
  19751. else:
  19752. w = trw
  19753. h = trw * small
  19754. else:
  19755. w = trw
  19756. h = trh
  19757. tmp = mupdf.fz_make_point(
  19758. (trect.x0 + trect.x1) / 2,
  19759. (trect.y0 + trect.y1) / 2,
  19760. )
  19761. mat = mupdf.fz_make_matrix(1, 0, 0, 1, -0.5, -0.5)
  19762. mat = mupdf.fz_concat(mat, rot)
  19763. mat = mupdf.fz_concat(mat, mupdf.fz_scale(w, h))
  19764. mat = mupdf.fz_concat(mat, mupdf.fz_translate(tmp.x, tmp.y))
  19765. return mat
  19766. def detect_super_script(line, ch):
  19767. if line.m_internal.wmode == 0 and line.m_internal.dir.x == 1 and line.m_internal.dir.y == 0:
  19768. return ch.m_internal.origin.y < line.m_internal.first_char.origin.y - ch.m_internal.size * 0.1
  19769. return 0
  19770. def dir_str(x):
  19771. ret = f'{x} {type(x)} ({len(dir(x))}):\n'
  19772. for i in dir(x):
  19773. ret += f' {i}\n'
  19774. return ret
  19775. def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, ordering: int) -> str:
  19776. """ Return a PDF string enclosed in [] brackets, suitable for the PDF TJ
  19777. operator.
  19778. Notes:
  19779. The input string is converted to either 2 or 4 hex digits per character.
  19780. Args:
  19781. simple: no glyphs: 2-chars, use char codes as the glyph
  19782. glyphs: 2-chars, use glyphs instead of char codes (Symbol,
  19783. ZapfDingbats)
  19784. not simple: ordering < 0: 4-chars, use glyphs not char codes
  19785. ordering >=0: a CJK font! 4 chars, use char codes as glyphs
  19786. """
  19787. if text.startswith("[<") and text.endswith(">]"): # already done
  19788. return text
  19789. if not bool(text):
  19790. return "[<>]"
  19791. if simple: # each char or its glyph is coded as a 2-byte hex
  19792. if glyphs is None: # not Symbol, not ZapfDingbats: use char code
  19793. otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text])
  19794. else: # Symbol or ZapfDingbats: use glyphs
  19795. otxt = "".join(
  19796. ["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text]
  19797. )
  19798. return "[<" + otxt + ">]"
  19799. # non-simple fonts: each char or its glyph is coded as 4-byte hex
  19800. if ordering < 0: # not a CJK font: use the glyphs
  19801. otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text])
  19802. else: # CJK: use the char codes
  19803. otxt = "".join(["%04x" % ord(c) for c in text])
  19804. return "[<" + otxt + ">]"
  19805. def get_pdf_str(s: str) -> str:
  19806. """ Return a PDF string depending on its coding.
  19807. Notes:
  19808. Returns a string bracketed with either "()" or "<>" for hex values.
  19809. If only ascii then "(original)" is returned, else if only 8 bit chars
  19810. then "(original)" with interspersed octal strings \nnn is returned,
  19811. else a string "<FEFF[hexstring]>" is returned, where [hexstring] is the
  19812. UTF-16BE encoding of the original.
  19813. """
  19814. if not bool(s):
  19815. return "()"
  19816. def make_utf16be(s):
  19817. r = bytearray([254, 255]) + bytearray(s, "UTF-16BE")
  19818. return "<" + r.hex() + ">" # brackets indicate hex
  19819. # The following either returns the original string with mixed-in
  19820. # octal numbers \nnn for chars outside the ASCII range, or returns
  19821. # the UTF-16BE BOM version of the string.
  19822. r = ""
  19823. for c in s:
  19824. oc = ord(c)
  19825. if oc > 255: # shortcut if beyond 8-bit code range
  19826. return make_utf16be(s)
  19827. if oc > 31 and oc < 127: # in ASCII range
  19828. if c in ("(", ")", "\\"): # these need to be escaped
  19829. r += "\\"
  19830. r += c
  19831. continue
  19832. if oc > 127: # beyond ASCII
  19833. r += "\\%03o" % oc
  19834. continue
  19835. # now the white spaces
  19836. if oc == 8: # backspace
  19837. r += "\\b"
  19838. elif oc == 9: # tab
  19839. r += "\\t"
  19840. elif oc == 10: # line feed
  19841. r += "\\n"
  19842. elif oc == 12: # form feed
  19843. r += "\\f"
  19844. elif oc == 13: # carriage return
  19845. r += "\\r"
  19846. else:
  19847. r += "\\267" # unsupported: replace by 0xB7
  19848. return "(" + r + ")"
  19849. def get_tessdata(tessdata=None):
  19850. """Detect Tesseract language support folder.
  19851. This function is used to enable OCR via Tesseract even if the language
  19852. support folder is not specified directly or in environment variable
  19853. TESSDATA_PREFIX.
  19854. * If <tessdata> is set we return it directly.
  19855. * Otherwise we return `os.environ['TESSDATA_PREFIX']` if set.
  19856. * Otherwise we search for a Tesseract installation and return its language
  19857. support folder.
  19858. * Otherwise we raise an exception.
  19859. """
  19860. if tessdata:
  19861. return tessdata
  19862. tessdata = os.getenv("TESSDATA_PREFIX")
  19863. if tessdata: # use environment variable if set
  19864. return tessdata
  19865. # Try to locate the tesseract-ocr installation.
  19866. import subprocess
  19867. cp = subprocess.run('tesseract --list-langs', shell=1, capture_output=1, check=0, text=True)
  19868. if cp.returncode == 0:
  19869. m = re.search('List of available languages in "(.+)"', cp.stdout)
  19870. if m:
  19871. tessdata = m.group(1)
  19872. return tessdata
  19873. # Windows systems:
  19874. if sys.platform == "win32":
  19875. cp = subprocess.run("where tesseract", shell=1, capture_output=1, check=0, text=True)
  19876. response = cp.stdout.strip()
  19877. if cp.returncode or not response:
  19878. raise RuntimeError("No tessdata specified and Tesseract is not installed")
  19879. dirname = os.path.dirname(response) # path of tesseract.exe
  19880. tessdata = os.path.join(dirname, "tessdata") # language support
  19881. if os.path.exists(tessdata): # all ok?
  19882. return tessdata
  19883. else: # should not happen!
  19884. raise RuntimeError("No tessdata specified and Tesseract installation has no {tessdata} folder")
  19885. # Unix-like systems:
  19886. attempts = list()
  19887. for path in 'tesseract-ocr', 'tesseract':
  19888. cp = subprocess.run(f'whereis {path}', shell=1, capture_output=1, check=0, text=True)
  19889. if cp.returncode == 0:
  19890. response = cp.stdout.strip().split()
  19891. if len(response) == 2:
  19892. # search tessdata in folder structure
  19893. dirname = response[1] # contains tesseract-ocr installation folder
  19894. pattern = f"{dirname}/*/tessdata"
  19895. attempts.append(pattern)
  19896. tessdatas = glob.glob(pattern)
  19897. tessdatas.sort()
  19898. if tessdatas:
  19899. return tessdatas[-1]
  19900. if attempts:
  19901. text = 'No tessdata specified and no match for:\n'
  19902. for attempt in attempts:
  19903. text += f' {attempt}'
  19904. raise RuntimeError(text)
  19905. else:
  19906. raise RuntimeError('No tessdata specified and Tesseract is not installed')
  19907. def css_for_pymupdf_font(
  19908. fontcode: str, *, CSS: OptStr = None, archive: AnyType = None, name: OptStr = None
  19909. ) -> str:
  19910. """Create @font-face items for the given fontcode of pymupdf-fonts.
  19911. Adds @font-face support for fonts contained in package pymupdf-fonts.
  19912. Creates a CSS font-family for all fonts starting with string 'fontcode'.
  19913. Note:
  19914. The font naming convention in package pymupdf-fonts is "fontcode<sf>",
  19915. where the suffix "sf" is either empty or one of "it", "bo" or "bi".
  19916. These suffixes thus represent the regular, italic, bold or bold-italic
  19917. variants of a font. For example, font code "notos" refers to fonts
  19918. "notos" - "Noto Sans Regular"
  19919. "notosit" - "Noto Sans Italic"
  19920. "notosbo" - "Noto Sans Bold"
  19921. "notosbi" - "Noto Sans Bold Italic"
  19922. This function creates four CSS @font-face definitions and collectively
  19923. assigns the font-family name "notos" to them (or the "name" value).
  19924. All fitting font buffers of the pymupdf-fonts package are placed / added
  19925. to the archive provided as parameter.
  19926. To use the font in pymupdf.Story, execute 'set_font(fontcode)'. The correct
  19927. font weight (bold) or style (italic) will automatically be selected.
  19928. Expects and returns the CSS source, with the new CSS definitions appended.
  19929. Args:
  19930. fontcode: (str) font code for naming the font variants to include.
  19931. E.g. "fig" adds notos, notosi, notosb, notosbi fonts.
  19932. A maximum of 4 font variants is accepted.
  19933. CSS: (str) CSS string to add @font-face definitions to.
  19934. archive: (Archive, mandatory) where to place the font buffers.
  19935. name: (str) use this as family-name instead of 'fontcode'.
  19936. Returns:
  19937. Modified CSS, with appended @font-face statements for each font variant
  19938. of fontcode.
  19939. Fontbuffers associated with "fontcode" will be added to 'archive'.
  19940. """
  19941. # @font-face template string
  19942. CSSFONT = "\n@font-face {font-family: %s; src: url(%s);%s%s}\n"
  19943. if not type(archive) is Archive:
  19944. raise ValueError("'archive' must be an Archive")
  19945. if CSS is None:
  19946. CSS = ""
  19947. # select font codes starting with the pass-in string
  19948. font_keys = [k for k in fitz_fontdescriptors.keys() if k.startswith(fontcode)]
  19949. if font_keys == []:
  19950. raise ValueError(f"No font code '{fontcode}' found in pymupdf-fonts.")
  19951. if len(font_keys) > 4:
  19952. raise ValueError("fontcode too short")
  19953. if name is None: # use this name for font-family
  19954. name = fontcode
  19955. for fkey in font_keys:
  19956. font = fitz_fontdescriptors[fkey]
  19957. bold = font["bold"] # determine font property
  19958. italic = font["italic"] # determine font property
  19959. fbuff = font["loader"]() # load the fontbuffer
  19960. archive.add(fbuff, fkey) # update the archive
  19961. bold_text = "font-weight: bold;" if bold else ""
  19962. italic_text = "font-style: italic;" if italic else ""
  19963. CSS += CSSFONT % (name, fkey, bold_text, italic_text)
  19964. return CSS
  19965. def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float:
  19966. """Calculate length of a string for a built-in font.
  19967. Args:
  19968. fontname: name of the font.
  19969. fontsize: font size points.
  19970. encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic.
  19971. Returns:
  19972. (float) length of text.
  19973. """
  19974. fontname = fontname.lower()
  19975. basename = Base14_fontdict.get(fontname, None)
  19976. glyphs = None
  19977. if basename == "Symbol":
  19978. glyphs = symbol_glyphs
  19979. if basename == "ZapfDingbats":
  19980. glyphs = zapf_glyphs
  19981. if glyphs is not None:
  19982. w = sum([glyphs[ord(c)][1] if ord(c) < 256 else glyphs[183][1] for c in text])
  19983. return w * fontsize
  19984. if fontname in Base14_fontdict.keys():
  19985. return util_measure_string(
  19986. text, Base14_fontdict[fontname], fontsize, encoding
  19987. )
  19988. if fontname in (
  19989. "china-t",
  19990. "china-s",
  19991. "china-ts",
  19992. "china-ss",
  19993. "japan",
  19994. "japan-s",
  19995. "korea",
  19996. "korea-s",
  19997. ):
  19998. return len(text) * fontsize
  19999. raise ValueError("Font '%s' is unsupported" % fontname)
  20000. def image_profile(img: ByteString) -> dict:
  20001. """ Return basic properties of an image.
  20002. Args:
  20003. img: bytes, bytearray, io.BytesIO object or an opened image file.
  20004. Returns:
  20005. A dictionary with keys width, height, colorspace.n, bpc, type, ext and size,
  20006. where 'type' is the MuPDF image type (0 to 14) and 'ext' the suitable
  20007. file extension.
  20008. """
  20009. if type(img) is io.BytesIO:
  20010. stream = img.getvalue()
  20011. elif hasattr(img, "read"):
  20012. stream = img.read()
  20013. elif type(img) in (bytes, bytearray):
  20014. stream = img
  20015. else:
  20016. raise ValueError("bad argument 'img'")
  20017. return TOOLS.image_profile(stream)
  20018. def jm_append_merge(dev):
  20019. '''
  20020. Append current path to list or merge into last path of the list.
  20021. (1) Append if first path, different item lists or not a 'stroke' version
  20022. of previous path
  20023. (2) If new path has the same items, merge its content into previous path
  20024. and change path["type"] to "fs".
  20025. (3) If "out" is callable, skip the previous and pass dictionary to it.
  20026. '''
  20027. #log(f'{getattr(dev, "pathdict", None)=}')
  20028. assert isinstance(dev.out, list)
  20029. #log( f'{dev.out=}')
  20030. if callable(dev.method) or dev.method: # function or method
  20031. # callback.
  20032. if dev.method is None:
  20033. # fixme, this surely cannot happen?
  20034. assert 0
  20035. #resp = PyObject_CallFunctionObjArgs(out, dev.pathdict, NULL)
  20036. else:
  20037. #log(f'calling {dev.out=} {dev.method=} {dev.pathdict=}')
  20038. resp = getattr(dev.out, dev.method)(dev.pathdict)
  20039. if not resp:
  20040. message("calling cdrawings callback function/method failed!")
  20041. dev.pathdict = None
  20042. return
  20043. def append():
  20044. #log(f'jm_append_merge(): clearing dev.pathdict')
  20045. dev.out.append(dev.pathdict.copy())
  20046. dev.pathdict.clear()
  20047. assert isinstance(dev.out, list)
  20048. len_ = len(dev.out) # len of output list so far
  20049. #log('{len_=}')
  20050. if len_ == 0: # always append first path
  20051. return append()
  20052. #log(f'{getattr(dev, "pathdict", None)=}')
  20053. thistype = dev.pathdict[ dictkey_type]
  20054. #log(f'{thistype=}')
  20055. if thistype != 's': # if not stroke, then append
  20056. return append()
  20057. prev = dev.out[ len_-1] # get prev path
  20058. #log( f'{prev=}')
  20059. prevtype = prev[ dictkey_type]
  20060. #log( f'{prevtype=}')
  20061. if prevtype != 'f': # if previous not fill, append
  20062. return append()
  20063. # last check: there must be the same list of items for "f" and "s".
  20064. previtems = prev[ dictkey_items]
  20065. thisitems = dev.pathdict[ dictkey_items]
  20066. if previtems != thisitems:
  20067. return append()
  20068. #rc = PyDict_Merge(prev, dev.pathdict, 0); // merge with no override
  20069. try:
  20070. for k, v in dev.pathdict.items():
  20071. if k not in prev:
  20072. prev[k] = v
  20073. rc = 0
  20074. except Exception:
  20075. if g_exceptions_verbose: exception_info()
  20076. #raise
  20077. rc = -1
  20078. if rc == 0:
  20079. prev[ dictkey_type] = 'fs'
  20080. dev.pathdict.clear()
  20081. else:
  20082. message("could not merge stroke and fill path")
  20083. append()
  20084. def jm_bbox_add_rect( dev, ctx, rect, code):
  20085. if not dev.layers:
  20086. dev.result.append( (code, JM_py_from_rect(rect)))
  20087. else:
  20088. dev.result.append( (code, JM_py_from_rect(rect), dev.layer_name))
  20089. def jm_bbox_fill_image( dev, ctx, image, ctm, alpha, color_params):
  20090. r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  20091. r = mupdf.ll_fz_transform_rect( r.internal(), ctm)
  20092. jm_bbox_add_rect( dev, ctx, r, "fill-image")
  20093. def jm_bbox_fill_image_mask( dev, ctx, image, ctm, colorspace, color, alpha, color_params):
  20094. try:
  20095. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_transform_rect(mupdf.fz_unit_rect, ctm), "fill-imgmask")
  20096. except Exception:
  20097. if g_exceptions_verbose: exception_info()
  20098. raise
  20099. def jm_bbox_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
  20100. even_odd = True if even_odd else False
  20101. try:
  20102. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path(path, None, ctm), "fill-path")
  20103. except Exception:
  20104. if g_exceptions_verbose: exception_info()
  20105. raise
  20106. def jm_bbox_fill_shade( dev, ctx, shade, ctm, alpha, color_params):
  20107. try:
  20108. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_shade( shade, ctm), "fill-shade")
  20109. except Exception:
  20110. if g_exceptions_verbose: exception_info()
  20111. raise
  20112. def jm_bbox_stroke_text( dev, ctx, text, stroke, ctm, *args):
  20113. try:
  20114. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, stroke, ctm), "stroke-text")
  20115. except Exception:
  20116. if g_exceptions_verbose: exception_info()
  20117. raise
  20118. def jm_bbox_fill_text( dev, ctx, text, ctm, *args):
  20119. try:
  20120. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, None, ctm), "fill-text")
  20121. except Exception:
  20122. if g_exceptions_verbose: exception_info()
  20123. raise
  20124. def jm_bbox_ignore_text( dev, ctx, text, ctm):
  20125. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text(text, None, ctm), "ignore-text")
  20126. def jm_bbox_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
  20127. try:
  20128. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path( path, stroke, ctm), "stroke-path")
  20129. except Exception:
  20130. if g_exceptions_verbose: exception_info()
  20131. raise
  20132. def jm_checkquad(dev):
  20133. '''
  20134. Check whether the last 4 lines represent a quad.
  20135. Because of how we count, the lines are a polyline already, i.e. last point
  20136. of a line equals 1st point of next line.
  20137. So we check for a polygon (last line's end point equals start point).
  20138. If not true we return 0.
  20139. '''
  20140. #log(f'{getattr(dev, "pathdict", None)=}')
  20141. items = dev.pathdict[ dictkey_items]
  20142. len_ = len(items)
  20143. f = [0] * 8 # coordinates of the 4 corners
  20144. # fill the 8 floats in f, start from items[-4:]
  20145. for i in range( 4): # store line start points
  20146. line = items[ len_ - 4 + i]
  20147. temp = JM_point_from_py( line[1])
  20148. f[i * 2] = temp.x
  20149. f[i * 2 + 1] = temp.y
  20150. lp = JM_point_from_py( line[ 2])
  20151. if lp.x != f[0] or lp.y != f[1]:
  20152. # not a polygon!
  20153. #dev.linecount -= 1
  20154. return 0
  20155. # we have detected a quad
  20156. dev.linecount = 0 # reset this
  20157. # a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items
  20158. # are pairs of floats representing a quad corner each.
  20159. # relationship of float array to quad points:
  20160. # (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr
  20161. q = mupdf.fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5])
  20162. rect = ('qu', JM_py_from_quad(q))
  20163. items[ len_ - 4] = rect # replace item -4 by rect
  20164. del items[ len_ - 3 : len_] # delete remaining 3 items
  20165. return 1
  20166. def jm_checkrect(dev):
  20167. '''
  20168. Check whether the last 3 path items represent a rectangle.
  20169. Returns 1 if we have modified the path, otherwise 0.
  20170. '''
  20171. #log(f'{getattr(dev, "pathdict", None)=}')
  20172. dev.linecount = 0 # reset line count
  20173. orientation = 0 # area orientation of rectangle
  20174. items = dev.pathdict[ dictkey_items]
  20175. len_ = len(items)
  20176. line0 = items[ len_ - 3]
  20177. ll = JM_point_from_py( line0[ 1])
  20178. lr = JM_point_from_py( line0[ 2])
  20179. # no need to extract "line1"!
  20180. line2 = items[ len_ - 1]
  20181. ur = JM_point_from_py( line2[ 1])
  20182. ul = JM_point_from_py( line2[ 2])
  20183. # Assumption:
  20184. # When decomposing rects, MuPDF always starts with a horizontal line,
  20185. # followed by a vertical line, followed by a horizontal line.
  20186. # First line: (ll, lr), third line: (ul, ur).
  20187. # If 1st line is below 3rd line, we record anti-clockwise (+1), else
  20188. # clockwise (-1) orientation.
  20189. if (0
  20190. or ll.y != lr.y
  20191. or ll.x != ul.x
  20192. or ur.y != ul.y
  20193. or ur.x != lr.x
  20194. ):
  20195. return 0 # not a rectangle
  20196. # we have a rect, replace last 3 "l" items by one "re" item.
  20197. if ul.y < lr.y:
  20198. r = mupdf.fz_make_rect(ul.x, ul.y, lr.x, lr.y)
  20199. orientation = 1
  20200. else:
  20201. r = mupdf.fz_make_rect(ll.x, ll.y, ur.x, ur.y)
  20202. orientation = -1
  20203. rect = ( 're', JM_py_from_rect(r), orientation)
  20204. items[ len_ - 3] = rect # replace item -3 by rect
  20205. del items[ len_ - 2 : len_] # delete remaining 2 items
  20206. return 1
  20207. def jm_trace_text( dev, text, type_, ctm, colorspace, color, alpha, seqno):
  20208. span = text.head
  20209. while 1:
  20210. if not span:
  20211. break
  20212. jm_trace_text_span( dev, span, type_, ctm, colorspace, color, alpha, seqno)
  20213. span = span.next
  20214. def jm_trace_text_span(dev, span, type_, ctm, colorspace, color, alpha, seqno):
  20215. '''
  20216. jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, size_t seqno)
  20217. '''
  20218. out_font = None
  20219. assert isinstance( span, mupdf.fz_text_span)
  20220. span = mupdf.FzTextSpan( span)
  20221. assert isinstance( ctm, mupdf.fz_matrix)
  20222. ctm = mupdf.FzMatrix( ctm)
  20223. fontname = JM_font_name( span.font())
  20224. #float rgb[3];
  20225. #PyObject *chars = PyTuple_New(span->len);
  20226. mat = mupdf.fz_concat(span.trm(), ctm) # text transformation matrix
  20227. dir = mupdf.fz_transform_vector(mupdf.fz_make_point(1, 0), mat) # writing direction
  20228. fsize = math.sqrt(dir.x * dir.x + dir.y * dir.y) # font size
  20229. dir = mupdf.fz_normalize_vector(dir)
  20230. space_adv = 0
  20231. asc = JM_font_ascender( span.font())
  20232. dsc = JM_font_descender( span.font())
  20233. if asc < 1e-3: # probably Tesseract font
  20234. dsc = -0.1
  20235. asc = 0.9
  20236. # compute effective ascender / descender
  20237. ascsize = asc * fsize / (asc - dsc)
  20238. dscsize = dsc * fsize / (asc - dsc)
  20239. fflags = 0 # font flags
  20240. mono = mupdf.fz_font_is_monospaced( span.font())
  20241. fflags += mono * TEXT_FONT_MONOSPACED
  20242. fflags += mupdf.fz_font_is_italic( span.font()) * TEXT_FONT_ITALIC
  20243. fflags += mupdf.fz_font_is_serif( span.font()) * TEXT_FONT_SERIFED
  20244. fflags += mupdf.fz_font_is_bold( span.font()) * TEXT_FONT_BOLD
  20245. last_adv = 0
  20246. # walk through characters of span
  20247. span_bbox = mupdf.FzRect()
  20248. rot = mupdf.fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0)
  20249. if dir.x == -1: # left-right flip
  20250. rot.d = 1
  20251. chars = []
  20252. for i in range( span.m_internal.len):
  20253. adv = 0
  20254. if span.items(i).gid >= 0:
  20255. adv = mupdf.fz_advance_glyph( span.font(), span.items(i).gid, span.m_internal.wmode)
  20256. adv *= fsize
  20257. last_adv = adv
  20258. if span.items(i).ucs == 32:
  20259. space_adv = adv
  20260. char_orig = mupdf.fz_make_point(span.items(i).x, span.items(i).y)
  20261. char_orig = mupdf.fz_transform_point(char_orig, ctm)
  20262. m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y)
  20263. m1 = mupdf.fz_concat(m1, rot)
  20264. m1 = mupdf.fz_concat(m1, mupdf.FzMatrix(1, 0, 0, 1, char_orig.x, char_orig.y))
  20265. x0 = char_orig.x
  20266. x1 = x0 + adv
  20267. if (
  20268. (mat.d > 0 and (dir.x == 1 or dir.x == -1))
  20269. or
  20270. (mat.b != 0 and mat.b == -mat.c)
  20271. ): # up-down flip
  20272. y0 = char_orig.y + dscsize
  20273. y1 = char_orig.y + ascsize
  20274. else:
  20275. y0 = char_orig.y - ascsize
  20276. y1 = char_orig.y - dscsize
  20277. char_bbox = mupdf.fz_make_rect(x0, y0, x1, y1)
  20278. char_bbox = mupdf.fz_transform_rect(char_bbox, m1)
  20279. chars.append(
  20280. (
  20281. span.items(i).ucs,
  20282. span.items(i).gid,
  20283. (
  20284. char_orig.x,
  20285. char_orig.y,
  20286. ),
  20287. (
  20288. char_bbox.x0,
  20289. char_bbox.y0,
  20290. char_bbox.x1,
  20291. char_bbox.y1,
  20292. ),
  20293. )
  20294. )
  20295. if i > 0:
  20296. span_bbox = mupdf.fz_union_rect(span_bbox, char_bbox)
  20297. else:
  20298. span_bbox = char_bbox
  20299. chars = tuple(chars)
  20300. if not space_adv:
  20301. if not (fflags & TEXT_FONT_MONOSPACED):
  20302. c, out_font = mupdf.fz_encode_character_with_fallback( span.font(), 32, 0, 0)
  20303. space_adv = mupdf.fz_advance_glyph(
  20304. span.font(),
  20305. c,
  20306. span.m_internal.wmode,
  20307. )
  20308. space_adv *= fsize
  20309. if not space_adv:
  20310. space_adv = last_adv
  20311. else:
  20312. space_adv = last_adv # for mono, any char width suffices
  20313. # make the span dictionary
  20314. span_dict = dict()
  20315. span_dict[ 'dir'] = JM_py_from_point(dir)
  20316. span_dict[ 'font'] = JM_EscapeStrFromStr(fontname)
  20317. span_dict[ 'wmode'] = span.m_internal.wmode
  20318. span_dict[ 'flags'] =fflags
  20319. span_dict[ "bidi_lvl"] =span.m_internal.bidi_level
  20320. span_dict[ "bidi_dir"] = span.m_internal.markup_dir
  20321. span_dict[ 'ascender'] = asc
  20322. span_dict[ 'descender'] = dsc
  20323. span_dict[ 'colorspace'] = 3
  20324. if colorspace:
  20325. rgb = mupdf.fz_convert_color(
  20326. mupdf.FzColorspace( mupdf.ll_fz_keep_colorspace( colorspace)),
  20327. color,
  20328. mupdf.fz_device_rgb(),
  20329. mupdf.FzColorspace(),
  20330. mupdf.FzColorParams(),
  20331. )
  20332. rgb = rgb[:3] # mupdf.fz_convert_color() always returns 4 items.
  20333. else:
  20334. rgb = (0, 0, 0)
  20335. if dev.linewidth > 0: # width of character border
  20336. linewidth = dev.linewidth
  20337. else:
  20338. linewidth = fsize * 0.05 # default: 5% of font size
  20339. #log(f'{dev.linewidth=:.4f} {fsize=:.4f} {linewidth=:.4f}')
  20340. span_dict[ 'color'] = rgb
  20341. span_dict[ 'size'] = fsize
  20342. span_dict[ "opacity"] = alpha
  20343. span_dict[ "linewidth"] = linewidth
  20344. span_dict[ "spacewidth"] = space_adv
  20345. span_dict[ 'type'] = type_
  20346. span_dict[ 'bbox'] = JM_py_from_rect(span_bbox)
  20347. span_dict[ 'layer'] = dev.layer_name
  20348. span_dict[ "seqno"] = seqno
  20349. span_dict[ 'chars'] = chars
  20350. #log(f'{span_dict=}')
  20351. dev.out.append( span_dict)
  20352. def jm_lineart_color(colorspace, color):
  20353. #log(f' ')
  20354. if colorspace:
  20355. try:
  20356. # Need to be careful to use a named Python object to ensure
  20357. # that the `params` we pass to mupdf.ll_fz_convert_color() is
  20358. # valid. E.g. doing:
  20359. #
  20360. # rgb = mupdf.ll_fz_convert_color(..., mupdf.FzColorParams().internal())
  20361. #
  20362. # - seems to end up with a corrupted `params`.
  20363. #
  20364. cs = mupdf.FzColorspace( mupdf.FzColorspace.Fixed_RGB)
  20365. cp = mupdf.FzColorParams()
  20366. rgb = mupdf.ll_fz_convert_color(
  20367. colorspace,
  20368. color,
  20369. cs.m_internal,
  20370. None,
  20371. cp.internal(),
  20372. )
  20373. except Exception:
  20374. if g_exceptions_verbose: exception_info()
  20375. raise
  20376. return rgb[:3]
  20377. return ()
  20378. def jm_lineart_drop_device(dev, ctx):
  20379. if isinstance(dev.out, list):
  20380. dev.out = []
  20381. dev.scissors = []
  20382. def jm_lineart_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
  20383. #log(f'{getattr(dev, "pathdict", None)=}')
  20384. #log(f'jm_lineart_fill_path(): {dev.seqno=}')
  20385. even_odd = True if even_odd else False
  20386. try:
  20387. assert isinstance( ctm, mupdf.fz_matrix)
  20388. dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
  20389. dev.path_type = trace_device_FILL_PATH
  20390. jm_lineart_path( dev, ctx, path)
  20391. if dev.pathdict is None:
  20392. return
  20393. #item_count = len(dev.pathdict[ dictkey_items])
  20394. #if item_count == 0:
  20395. # return
  20396. dev.pathdict[ dictkey_type] ="f"
  20397. dev.pathdict[ "even_odd"] = even_odd
  20398. dev.pathdict[ "fill_opacity"] = alpha
  20399. #log(f'setting dev.pathdict[ "closePath"] to false')
  20400. #dev.pathdict[ "closePath"] = False
  20401. dev.pathdict[ "fill"] = jm_lineart_color( colorspace, color)
  20402. dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
  20403. dev.pathdict[ "seqno"] = dev.seqno
  20404. #jm_append_merge(dev)
  20405. dev.pathdict[ 'layer'] = dev.layer_name
  20406. if dev.clips:
  20407. dev.pathdict[ 'level'] = dev.depth
  20408. jm_append_merge(dev)
  20409. dev.seqno += 1
  20410. #log(f'jm_lineart_fill_path() end: {getattr(dev, "pathdict", None)=}')
  20411. except Exception:
  20412. if g_exceptions_verbose: exception_info()
  20413. raise
  20414. # There are 3 text trace types:
  20415. # 0 - fill text (PDF Tr 0)
  20416. # 1 - stroke text (PDF Tr 1)
  20417. # 3 - ignore text (PDF Tr 3)
  20418. def jm_lineart_fill_text( dev, ctx, text, ctm, colorspace, color, alpha, color_params):
  20419. if 0:
  20420. log(f'{type(ctx)=} {ctx=}')
  20421. log(f'{type(dev)=} {dev=}')
  20422. log(f'{type(text)=} {text=}')
  20423. log(f'{type(ctm)=} {ctm=}')
  20424. log(f'{type(colorspace)=} {colorspace=}')
  20425. log(f'{type(color)=} {color=}')
  20426. log(f'{type(alpha)=} {alpha=}')
  20427. log(f'{type(color_params)=} {color_params=}')
  20428. jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev.seqno)
  20429. dev.seqno += 1
  20430. def jm_lineart_ignore_text(dev, text, ctm):
  20431. #log(f'{getattr(dev, "pathdict", None)=}')
  20432. jm_trace_text(dev, text, 3, ctm, None, None, 1, dev.seqno)
  20433. dev.seqno += 1
  20434. class Walker(mupdf.FzPathWalker2):
  20435. def __init__(self, dev):
  20436. super().__init__()
  20437. self.use_virtual_moveto()
  20438. self.use_virtual_lineto()
  20439. self.use_virtual_curveto()
  20440. self.use_virtual_closepath()
  20441. self.dev = dev
  20442. def closepath(self, ctx): # trace_close().
  20443. #log(f'Walker(): {self.dev.pathdict=}')
  20444. try:
  20445. if self.dev.linecount == 3:
  20446. if jm_checkrect(self.dev):
  20447. #log(f'end1: {self.dev.pathdict=}')
  20448. return
  20449. self.dev.linecount = 0 # reset # of consec. lines
  20450. if self.dev.havemove:
  20451. if self.dev.lastpoint != self.dev.firstpoint:
  20452. item = ("l", JM_py_from_point(self.dev.lastpoint),
  20453. JM_py_from_point(self.dev.firstpoint))
  20454. self.dev.pathdict[dictkey_items].append(item)
  20455. self.dev.lastpoint = self.dev.firstpoint
  20456. self.dev.pathdict["closePath"] = False
  20457. else:
  20458. #log('setting self.dev.pathdict[ "closePath"] to true')
  20459. self.dev.pathdict[ "closePath"] = True
  20460. #log(f'end2: {self.dev.pathdict=}')
  20461. self.dev.havemove = 0
  20462. except Exception:
  20463. if g_exceptions_verbose: exception_info()
  20464. raise
  20465. def curveto(self, ctx, x1, y1, x2, y2, x3, y3): # trace_curveto().
  20466. #log(f'Walker(): {self.dev.pathdict=}')
  20467. try:
  20468. self.dev.linecount = 0 # reset # of consec. lines
  20469. p1 = mupdf.fz_make_point(x1, y1)
  20470. p2 = mupdf.fz_make_point(x2, y2)
  20471. p3 = mupdf.fz_make_point(x3, y3)
  20472. p1 = mupdf.fz_transform_point(p1, self.dev.ctm)
  20473. p2 = mupdf.fz_transform_point(p2, self.dev.ctm)
  20474. p3 = mupdf.fz_transform_point(p3, self.dev.ctm)
  20475. self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p1)
  20476. self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p2)
  20477. self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p3)
  20478. list_ = (
  20479. "c",
  20480. JM_py_from_point(self.dev.lastpoint),
  20481. JM_py_from_point(p1),
  20482. JM_py_from_point(p2),
  20483. JM_py_from_point(p3),
  20484. )
  20485. self.dev.lastpoint = p3
  20486. self.dev.pathdict[ dictkey_items].append( list_)
  20487. except Exception:
  20488. if g_exceptions_verbose: exception_info()
  20489. raise
  20490. def lineto(self, ctx, x, y): # trace_lineto().
  20491. #log(f'Walker(): {self.dev.pathdict=}')
  20492. try:
  20493. p1 = mupdf.fz_transform_point( mupdf.fz_make_point(x, y), self.dev.ctm)
  20494. self.dev.pathrect = mupdf.fz_include_point_in_rect( self.dev.pathrect, p1)
  20495. list_ = (
  20496. 'l',
  20497. JM_py_from_point( self.dev.lastpoint),
  20498. JM_py_from_point(p1),
  20499. )
  20500. self.dev.lastpoint = p1
  20501. items = self.dev.pathdict[ dictkey_items]
  20502. items.append( list_)
  20503. self.dev.linecount += 1 # counts consecutive lines
  20504. if self.dev.linecount == 4 and self.dev.path_type != trace_device_FILL_PATH:
  20505. # shrink to "re" or "qu" item
  20506. jm_checkquad(self.dev)
  20507. except Exception:
  20508. if g_exceptions_verbose: exception_info()
  20509. raise
  20510. def moveto(self, ctx, x, y): # trace_moveto().
  20511. if 0 and isinstance(self.dev.pathdict, dict):
  20512. log(f'self.dev.pathdict:')
  20513. for n, v in self.dev.pathdict.items():
  20514. log( ' {type(n)=} {len(n)=} {n!r} {n}: {v!r}: {v}')
  20515. #log(f'Walker(): {type(self.dev.pathdict)=} {self.dev.pathdict=}')
  20516. try:
  20517. #log( '{=dev.ctm type(dev.ctm)}')
  20518. self.dev.lastpoint = mupdf.fz_transform_point(
  20519. mupdf.fz_make_point(x, y),
  20520. self.dev.ctm,
  20521. )
  20522. if mupdf.fz_is_infinite_rect( self.dev.pathrect):
  20523. self.dev.pathrect = mupdf.fz_make_rect(
  20524. self.dev.lastpoint.x,
  20525. self.dev.lastpoint.y,
  20526. self.dev.lastpoint.x,
  20527. self.dev.lastpoint.y,
  20528. )
  20529. self.dev.firstpoint = self.dev.lastpoint
  20530. self.dev.havemove = 1
  20531. self.dev.linecount = 0 # reset # of consec. lines
  20532. except Exception:
  20533. if g_exceptions_verbose: exception_info()
  20534. raise
  20535. def jm_lineart_path(dev, ctx, path):
  20536. '''
  20537. Create the "items" list of the path dictionary
  20538. * either create or empty the path dictionary
  20539. * reset the end point of the path
  20540. * reset count of consecutive lines
  20541. * invoke fz_walk_path(), which create the single items
  20542. * if no items detected, empty path dict again
  20543. '''
  20544. #log(f'{getattr(dev, "pathdict", None)=}')
  20545. try:
  20546. dev.pathrect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
  20547. dev.linecount = 0
  20548. dev.lastpoint = mupdf.FzPoint( 0, 0)
  20549. dev.pathdict = dict()
  20550. dev.pathdict[ dictkey_items] = []
  20551. # First time we create a Walker instance is slow, e.g. 0.3s, then later
  20552. # times run in around 0.01ms. If Walker is defined locally instead of
  20553. # globally, each time takes 0.3s.
  20554. #
  20555. walker = Walker(dev)
  20556. # Unlike fz_run_page(), fz_path_walker callbacks are not passed
  20557. # a pointer to the struct, instead they get an arbitrary
  20558. # void*. The underlying C++ Director callbacks use this void* to
  20559. # identify the fz_path_walker instance so in turn we need to pass
  20560. # arg=walker.m_internal.
  20561. mupdf.fz_walk_path( mupdf.FzPath(mupdf.ll_fz_keep_path(path)), walker, walker.m_internal)
  20562. # Check if any items were added ...
  20563. if not dev.pathdict[ dictkey_items]:
  20564. dev.pathdict = None
  20565. except Exception:
  20566. if g_exceptions_verbose: exception_info()
  20567. raise
  20568. def jm_lineart_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
  20569. #log(f'{dev.pathdict=} {dev.clips=}')
  20570. try:
  20571. assert isinstance( ctm, mupdf.fz_matrix)
  20572. dev.pathfactor = 1
  20573. if ctm.a != 0 and abs(ctm.a) == abs(ctm.d):
  20574. dev.pathfactor = abs(ctm.a)
  20575. elif ctm.b != 0 and abs(ctm.b) == abs(ctm.c):
  20576. dev.pathfactor = abs(ctm.b)
  20577. dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
  20578. dev.path_type = trace_device_STROKE_PATH
  20579. jm_lineart_path( dev, ctx, path)
  20580. if dev.pathdict is None:
  20581. return
  20582. dev.pathdict[ dictkey_type] = 's'
  20583. dev.pathdict[ 'stroke_opacity'] = alpha
  20584. dev.pathdict[ 'color'] = jm_lineart_color( colorspace, color)
  20585. dev.pathdict[ dictkey_width] = dev.pathfactor * stroke.linewidth
  20586. dev.pathdict[ 'lineCap'] = (
  20587. stroke.start_cap,
  20588. stroke.dash_cap,
  20589. stroke.end_cap,
  20590. )
  20591. dev.pathdict[ 'lineJoin'] = dev.pathfactor * stroke.linejoin
  20592. if 'closePath' not in dev.pathdict:
  20593. #log('setting dev.pathdict["closePath"] to false')
  20594. dev.pathdict['closePath'] = False
  20595. # output the "dashes" string
  20596. if stroke.dash_len:
  20597. buff = mupdf.fz_new_buffer( 256)
  20598. mupdf.fz_append_string( buff, "[ ") # left bracket
  20599. for i in range( stroke.dash_len):
  20600. # We use mupdf python's SWIG-generated floats_getitem() fn to
  20601. # access float *stroke.dash_list[].
  20602. value = mupdf.floats_getitem( stroke.dash_list, i) # stroke.dash_list[i].
  20603. mupdf.fz_append_string( buff, f'{_format_g(dev.pathfactor * value)} ')
  20604. mupdf.fz_append_string( buff, f'] {_format_g(dev.pathfactor * stroke.dash_phase)}')
  20605. dev.pathdict[ 'dashes'] = buff
  20606. else:
  20607. dev.pathdict[ 'dashes'] = '[] 0'
  20608. dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
  20609. dev.pathdict['layer'] = dev.layer_name
  20610. dev.pathdict[ 'seqno'] = dev.seqno
  20611. if dev.clips:
  20612. dev.pathdict[ 'level'] = dev.depth
  20613. jm_append_merge(dev)
  20614. dev.seqno += 1
  20615. except Exception:
  20616. if g_exceptions_verbose: exception_info()
  20617. raise
  20618. def jm_lineart_clip_path(dev, ctx, path, even_odd, ctm, scissor):
  20619. if not dev.clips:
  20620. return
  20621. dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
  20622. dev.path_type = trace_device_CLIP_PATH
  20623. jm_lineart_path(dev, ctx, path)
  20624. if dev.pathdict is None:
  20625. return
  20626. dev.pathdict[ dictkey_type] = 'clip'
  20627. dev.pathdict[ 'even_odd'] = bool(even_odd)
  20628. if 'closePath' not in dev.pathdict:
  20629. #log(f'setting dev.pathdict["closePath"] to False')
  20630. dev.pathdict['closePath'] = False
  20631. dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
  20632. dev.pathdict['level'] = dev.depth
  20633. dev.pathdict['layer'] = dev.layer_name
  20634. jm_append_merge(dev)
  20635. dev.depth += 1
  20636. def jm_lineart_clip_stroke_path(dev, ctx, path, stroke, ctm, scissor):
  20637. if not dev.clips:
  20638. return
  20639. dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
  20640. dev.path_type = trace_device_CLIP_STROKE_PATH
  20641. jm_lineart_path(dev, ctx, path)
  20642. if dev.pathdict is None:
  20643. return
  20644. dev.pathdict['dictkey_type'] = 'clip'
  20645. dev.pathdict['even_odd'] = None
  20646. if 'closePath' not in dev.pathdict:
  20647. #log(f'setting dev.pathdict["closePath"] to False')
  20648. dev.pathdict['closePath'] = False
  20649. dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
  20650. dev.pathdict['level'] = dev.depth
  20651. dev.pathdict['layer'] = dev.layer_name
  20652. jm_append_merge(dev)
  20653. dev.depth += 1
  20654. def jm_lineart_clip_stroke_text(dev, ctx, text, stroke, ctm, scissor):
  20655. if not dev.clips:
  20656. return
  20657. compute_scissor(dev)
  20658. dev.depth += 1
  20659. def jm_lineart_clip_text(dev, ctx, text, ctm, scissor):
  20660. if not dev.clips:
  20661. return
  20662. compute_scissor(dev)
  20663. dev.depth += 1
  20664. def jm_lineart_clip_image_mask( dev, ctx, image, ctm, scissor):
  20665. if not dev.clips:
  20666. return
  20667. compute_scissor(dev)
  20668. dev.depth += 1
  20669. def jm_lineart_pop_clip(dev, ctx):
  20670. if not dev.clips or not dev.scissors:
  20671. return
  20672. len_ = len(dev.scissors)
  20673. if len_ < 1:
  20674. return
  20675. del dev.scissors[-1]
  20676. dev.depth -= 1
  20677. def jm_lineart_begin_layer(dev, ctx, name):
  20678. if name:
  20679. dev.layer_name = name
  20680. else:
  20681. dev.layer_name = ""
  20682. def jm_lineart_end_layer(dev, ctx):
  20683. dev.layer_name = ""
  20684. def jm_lineart_begin_group(dev, ctx, bbox, cs, isolated, knockout, blendmode, alpha):
  20685. #log(f'{dev.pathdict=} {dev.clips=}')
  20686. if not dev.clips:
  20687. return
  20688. dev.pathdict = { # Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}",
  20689. "type": "group",
  20690. "rect": JM_py_from_rect(bbox),
  20691. "isolated": bool(isolated),
  20692. "knockout": bool(knockout),
  20693. "blendmode": mupdf.fz_blendmode_name(blendmode),
  20694. "opacity": alpha,
  20695. "level": dev.depth,
  20696. "layer": dev.layer_name
  20697. }
  20698. jm_append_merge(dev)
  20699. dev.depth += 1
  20700. def jm_lineart_end_group(dev, ctx):
  20701. #log(f'{dev.pathdict=} {dev.clips=}')
  20702. if not dev.clips:
  20703. return
  20704. dev.depth -= 1
  20705. def jm_lineart_stroke_text(dev, ctx, text, stroke, ctm, colorspace, color, alpha, color_params):
  20706. jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev.seqno)
  20707. dev.seqno += 1
  20708. def jm_dev_linewidth( dev, ctx, path, stroke, matrix, colorspace, color, alpha, color_params):
  20709. dev.linewidth = stroke.linewidth
  20710. jm_increase_seqno( dev, ctx)
  20711. def jm_increase_seqno( dev, ctx, *vargs):
  20712. try:
  20713. dev.seqno += 1
  20714. except Exception:
  20715. if g_exceptions_verbose: exception_info()
  20716. raise
  20717. def planish_line(p1: point_like, p2: point_like) -> Matrix:
  20718. """Compute matrix which maps line from p1 to p2 to the x-axis, such that it
  20719. maintains its length and p1 * matrix = Point(0, 0).
  20720. Args:
  20721. p1, p2: point_like
  20722. Returns:
  20723. Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at
  20724. the same distance to Point(0,0). Will always combine a rotation and a
  20725. transformation.
  20726. """
  20727. p1 = Point(p1)
  20728. p2 = Point(p2)
  20729. return Matrix(util_hor_matrix(p1, p2))
  20730. class JM_image_reporter_Filter(mupdf.PdfFilterOptions2):
  20731. def __init__(self):
  20732. super().__init__()
  20733. self.use_virtual_image_filter()
  20734. def image_filter( self, ctx, ctm, name, image):
  20735. assert isinstance(ctm, mupdf.fz_matrix)
  20736. JM_image_filter(self, mupdf.FzMatrix(ctm), name, image)
  20737. if mupdf_cppyy:
  20738. # cppyy doesn't appear to treat returned None as nullptr,
  20739. # resulting in obscure 'python exception' exception.
  20740. return 0
  20741. class JM_new_bbox_device_Device(mupdf.FzDevice2):
  20742. def __init__(self, result, layers):
  20743. super().__init__()
  20744. self.result = result
  20745. self.layers = layers
  20746. self.layer_name = ""
  20747. self.use_virtual_fill_path()
  20748. self.use_virtual_stroke_path()
  20749. self.use_virtual_fill_text()
  20750. self.use_virtual_stroke_text()
  20751. self.use_virtual_ignore_text()
  20752. self.use_virtual_fill_shade()
  20753. self.use_virtual_fill_image()
  20754. self.use_virtual_fill_image_mask()
  20755. self.use_virtual_begin_layer()
  20756. self.use_virtual_end_layer()
  20757. begin_layer = jm_lineart_begin_layer
  20758. end_layer = jm_lineart_end_layer
  20759. fill_path = jm_bbox_fill_path
  20760. stroke_path = jm_bbox_stroke_path
  20761. fill_text = jm_bbox_fill_text
  20762. stroke_text = jm_bbox_stroke_text
  20763. ignore_text = jm_bbox_ignore_text
  20764. fill_shade = jm_bbox_fill_shade
  20765. fill_image = jm_bbox_fill_image
  20766. fill_image_mask = jm_bbox_fill_image_mask
  20767. class JM_new_output_fileptr_Output(mupdf.FzOutput2):
  20768. def __init__(self, bio):
  20769. super().__init__()
  20770. self.bio = bio
  20771. self.use_virtual_write()
  20772. self.use_virtual_seek()
  20773. self.use_virtual_tell()
  20774. self.use_virtual_truncate()
  20775. def seek( self, ctx, offset, whence):
  20776. return self.bio.seek( offset, whence)
  20777. def tell( self, ctx):
  20778. ret = self.bio.tell()
  20779. return ret
  20780. def truncate( self, ctx):
  20781. return self.bio.truncate()
  20782. def write(self, ctx, data_raw, data_length):
  20783. data = mupdf.raw_to_python_bytes(data_raw, data_length)
  20784. return self.bio.write(data)
  20785. def compute_scissor(dev):
  20786. '''
  20787. Every scissor of a clip is a sub rectangle of the preceding clip scissor
  20788. if the clip level is larger.
  20789. '''
  20790. if dev.scissors is None:
  20791. dev.scissors = list()
  20792. num_scissors = len(dev.scissors)
  20793. if num_scissors > 0:
  20794. last_scissor = dev.scissors[num_scissors-1]
  20795. scissor = JM_rect_from_py(last_scissor)
  20796. scissor = mupdf.fz_intersect_rect(scissor, dev.pathrect)
  20797. else:
  20798. scissor = dev.pathrect
  20799. dev.scissors.append(JM_py_from_rect(scissor))
  20800. return scissor
  20801. class JM_new_lineart_device_Device(mupdf.FzDevice2):
  20802. '''
  20803. LINEART device for Python method Page.get_cdrawings()
  20804. '''
  20805. #log(f'JM_new_lineart_device_Device()')
  20806. def __init__(self, out, clips, method):
  20807. #log(f'JM_new_lineart_device_Device.__init__()')
  20808. super().__init__()
  20809. # fixme: this results in "Unexpected call of unimplemented virtual_fnptrs fn FzDevice2::drop_device().".
  20810. #self.use_virtual_drop_device()
  20811. self.use_virtual_fill_path()
  20812. self.use_virtual_stroke_path()
  20813. self.use_virtual_clip_path()
  20814. self.use_virtual_clip_image_mask()
  20815. self.use_virtual_clip_stroke_path()
  20816. self.use_virtual_clip_stroke_text()
  20817. self.use_virtual_clip_text()
  20818. self.use_virtual_fill_text
  20819. self.use_virtual_stroke_text
  20820. self.use_virtual_ignore_text
  20821. self.use_virtual_fill_shade()
  20822. self.use_virtual_fill_image()
  20823. self.use_virtual_fill_image_mask()
  20824. self.use_virtual_pop_clip()
  20825. self.use_virtual_begin_group()
  20826. self.use_virtual_end_group()
  20827. self.use_virtual_begin_layer()
  20828. self.use_virtual_end_layer()
  20829. self.out = out
  20830. self.seqno = 0
  20831. self.depth = 0
  20832. self.clips = clips
  20833. self.method = method
  20834. self.scissors = None
  20835. self.layer_name = "" # optional content name
  20836. self.pathrect = None
  20837. self.linewidth = 0
  20838. self.ptm = mupdf.FzMatrix()
  20839. self.ctm = mupdf.FzMatrix()
  20840. self.rot = mupdf.FzMatrix()
  20841. self.lastpoint = mupdf.FzPoint()
  20842. self.firstpoint = mupdf.FzPoint()
  20843. self.havemove = 0
  20844. self.pathrect = mupdf.FzRect()
  20845. self.pathfactor = 0
  20846. self.linecount = 0
  20847. self.path_type = 0
  20848. #drop_device = jm_lineart_drop_device
  20849. fill_path = jm_lineart_fill_path
  20850. stroke_path = jm_lineart_stroke_path
  20851. clip_image_mask = jm_lineart_clip_image_mask
  20852. clip_path = jm_lineart_clip_path
  20853. clip_stroke_path = jm_lineart_clip_stroke_path
  20854. clip_text = jm_lineart_clip_text
  20855. clip_stroke_text = jm_lineart_clip_stroke_text
  20856. fill_text = jm_increase_seqno
  20857. stroke_text = jm_increase_seqno
  20858. ignore_text = jm_increase_seqno
  20859. fill_shade = jm_increase_seqno
  20860. fill_image = jm_increase_seqno
  20861. fill_image_mask = jm_increase_seqno
  20862. pop_clip = jm_lineart_pop_clip
  20863. begin_group = jm_lineart_begin_group
  20864. end_group = jm_lineart_end_group
  20865. begin_layer = jm_lineart_begin_layer
  20866. end_layer = jm_lineart_end_layer
  20867. class JM_new_texttrace_device(mupdf.FzDevice2):
  20868. '''
  20869. Trace TEXT device for Python method Page.get_texttrace()
  20870. '''
  20871. def __init__(self, out):
  20872. super().__init__()
  20873. self.use_virtual_fill_path()
  20874. self.use_virtual_stroke_path()
  20875. self.use_virtual_fill_text()
  20876. self.use_virtual_stroke_text()
  20877. self.use_virtual_ignore_text()
  20878. self.use_virtual_fill_shade()
  20879. self.use_virtual_fill_image()
  20880. self.use_virtual_fill_image_mask()
  20881. self.use_virtual_begin_layer()
  20882. self.use_virtual_end_layer()
  20883. self.out = out
  20884. self.seqno = 0
  20885. self.depth = 0
  20886. self.clips = 0
  20887. self.method = None
  20888. self.seqno = 0
  20889. self.pathdict = dict()
  20890. self.scissors = list()
  20891. self.linewidth = 0
  20892. self.ptm = mupdf.FzMatrix()
  20893. self.ctm = mupdf.FzMatrix()
  20894. self.rot = mupdf.FzMatrix()
  20895. self.lastpoint = mupdf.FzPoint()
  20896. self.pathrect = mupdf.FzRect()
  20897. self.pathfactor = 0
  20898. self.linecount = 0
  20899. self.path_type = 0
  20900. self.layer_name = ""
  20901. fill_path = jm_increase_seqno
  20902. stroke_path = jm_dev_linewidth
  20903. fill_text = jm_lineart_fill_text
  20904. stroke_text = jm_lineart_stroke_text
  20905. ignore_text = jm_lineart_ignore_text
  20906. fill_shade = jm_increase_seqno
  20907. fill_image = jm_increase_seqno
  20908. fill_image_mask = jm_increase_seqno
  20909. begin_layer = jm_lineart_begin_layer
  20910. end_layer = jm_lineart_end_layer
  20911. def ConversionHeader(i: str, filename: OptStr ="unknown"):
  20912. t = i.lower()
  20913. import textwrap
  20914. html = textwrap.dedent("""
  20915. <!DOCTYPE html>
  20916. <html>
  20917. <head>
  20918. <style>
  20919. body{background-color:gray}
  20920. div{position:relative;background-color:white;margin:1em auto}
  20921. p{position:absolute;margin:0}
  20922. img{position:absolute}
  20923. </style>
  20924. </head>
  20925. <body>
  20926. """)
  20927. xml = textwrap.dedent("""
  20928. <?xml version="1.0"?>
  20929. <document name="%s">
  20930. """
  20931. % filename
  20932. )
  20933. xhtml = textwrap.dedent("""
  20934. <?xml version="1.0"?>
  20935. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  20936. <html xmlns="http://www.w3.org/1999/xhtml">
  20937. <head>
  20938. <style>
  20939. body{background-color:gray}
  20940. div{background-color:white;margin:1em;padding:1em}
  20941. p{white-space:pre-wrap}
  20942. </style>
  20943. </head>
  20944. <body>
  20945. """)
  20946. text = ""
  20947. json = '{"document": "%s", "pages": [\n' % filename
  20948. if t == "html":
  20949. r = html
  20950. elif t == "json":
  20951. r = json
  20952. elif t == "xml":
  20953. r = xml
  20954. elif t == "xhtml":
  20955. r = xhtml
  20956. else:
  20957. r = text
  20958. return r
  20959. def ConversionTrailer(i: str):
  20960. t = i.lower()
  20961. text = ""
  20962. json = "]\n}"
  20963. html = "</body>\n</html>\n"
  20964. xml = "</document>\n"
  20965. xhtml = html
  20966. if t == "html":
  20967. r = html
  20968. elif t == "json":
  20969. r = json
  20970. elif t == "xml":
  20971. r = xml
  20972. elif t == "xhtml":
  20973. r = xhtml
  20974. else:
  20975. r = text
  20976. return r
  20977. def annot_preprocess(page: "Page") -> int:
  20978. """Prepare for annotation insertion on the page.
  20979. Returns:
  20980. Old page rotation value. Temporarily sets rotation to 0 when required.
  20981. """
  20982. CheckParent(page)
  20983. if not page.parent.is_pdf:
  20984. raise ValueError("is no PDF")
  20985. old_rotation = page.rotation
  20986. if old_rotation != 0:
  20987. page.set_rotation(0)
  20988. return old_rotation
  20989. def annot_postprocess(page: "Page", annot: "Annot") -> None:
  20990. """Clean up after annotation insertion.
  20991. Set ownership flag and store annotation in page annotation dictionary.
  20992. """
  20993. #annot.parent = weakref.proxy(page)
  20994. assert isinstance( page, Page)
  20995. assert isinstance( annot, Annot)
  20996. annot.parent = page
  20997. page._annot_refs[id(annot)] = annot
  20998. annot.thisown = True
  20999. def canon(c):
  21000. assert isinstance(c, int)
  21001. # TODO: proper unicode case folding
  21002. # TODO: character equivalence (a matches ä, etc)
  21003. if c == 0xA0 or c == 0x2028 or c == 0x2029:
  21004. return ord(' ')
  21005. if c == ord('\r') or c == ord('\n') or c == ord('\t'):
  21006. return ord(' ')
  21007. if c >= ord('A') and c <= ord('Z'):
  21008. return c - ord('A') + ord('a')
  21009. return c
  21010. def chartocanon(s):
  21011. assert isinstance(s, str)
  21012. n, c = mupdf.fz_chartorune(s)
  21013. c = canon(c)
  21014. return n, c
  21015. def dest_is_valid(o, page_count, page_object_nums, names_list):
  21016. p = mupdf.pdf_dict_get( o, PDF_NAME('A'))
  21017. if (
  21018. mupdf.pdf_name_eq(
  21019. mupdf.pdf_dict_get( p, PDF_NAME('S')),
  21020. PDF_NAME('GoTo')
  21021. )
  21022. and not string_in_names_list(
  21023. mupdf.pdf_dict_get( p, PDF_NAME('D')),
  21024. names_list
  21025. )
  21026. ):
  21027. return 0
  21028. p = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
  21029. if not p.m_internal:
  21030. pass
  21031. elif mupdf.pdf_is_string( p):
  21032. return string_in_names_list( p, names_list)
  21033. elif not dest_is_valid_page(
  21034. mupdf.pdf_array_get( p, 0),
  21035. page_object_nums,
  21036. page_count,
  21037. ):
  21038. return 0
  21039. return 1
  21040. def dest_is_valid_page(obj, page_object_nums, pagecount):
  21041. num = mupdf.pdf_to_num(obj)
  21042. if num == 0:
  21043. return 0
  21044. for i in range(pagecount):
  21045. if page_object_nums[i] == num:
  21046. return 1
  21047. return 0
  21048. def find_string(s, needle):
  21049. assert isinstance(s, str)
  21050. for i in range(len(s)):
  21051. end = match_string(s[i:], needle)
  21052. if end is not None:
  21053. end += i
  21054. return i, end
  21055. return None, None
  21056. def get_pdf_now() -> str:
  21057. '''
  21058. "Now" timestamp in PDF Format
  21059. '''
  21060. import time
  21061. tz = "%s'%s'" % (
  21062. str(abs(time.altzone // 3600)).rjust(2, "0"),
  21063. str((abs(time.altzone // 60) % 60)).rjust(2, "0"),
  21064. )
  21065. tstamp = time.strftime("D:%Y%m%d%H%M%S", time.localtime())
  21066. if time.altzone > 0:
  21067. tstamp += "-" + tz
  21068. elif time.altzone < 0:
  21069. tstamp += "+" + tz
  21070. else:
  21071. pass
  21072. return tstamp
  21073. class ElementPosition(object):
  21074. """Convert a dictionary with element position information to an object."""
  21075. def __init__(self):
  21076. pass
  21077. def make_story_elpos():
  21078. return ElementPosition()
  21079. def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list:
  21080. """Return rectangles of text lines between two points.
  21081. Notes:
  21082. The default of 'start' is top-left of 'clip'. The default of 'stop'
  21083. is bottom-reight of 'clip'.
  21084. Args:
  21085. start: start point_like
  21086. stop: end point_like, must be 'below' start
  21087. clip: consider this rect_like only, default is page rectangle
  21088. Returns:
  21089. List of line bbox intersections with the area established by the
  21090. parameters.
  21091. """
  21092. # validate and normalize arguments
  21093. if clip is None:
  21094. clip = page.rect
  21095. clip = Rect(clip)
  21096. if start is None:
  21097. start = clip.tl
  21098. if stop is None:
  21099. stop = clip.br
  21100. clip.y0 = start.y
  21101. clip.y1 = stop.y
  21102. if clip.is_empty or clip.is_infinite:
  21103. return []
  21104. # extract text of page, clip only, no images, expand ligatures
  21105. blocks = page.get_text(
  21106. "dict", flags=0, clip=clip,
  21107. )["blocks"]
  21108. lines = [] # will return this list of rectangles
  21109. for b in blocks:
  21110. bbox = Rect(b["bbox"])
  21111. if bbox.is_infinite or bbox.is_empty:
  21112. continue
  21113. for line in b["lines"]:
  21114. bbox = Rect(line["bbox"])
  21115. if bbox.is_infinite or bbox.is_empty:
  21116. continue
  21117. lines.append(bbox)
  21118. if lines == []: # did not select anything
  21119. return lines
  21120. lines.sort(key=lambda bbox: bbox.y1) # sort by vertical positions
  21121. # cut off prefix from first line if start point is close to its top
  21122. bboxf = lines.pop(0)
  21123. if bboxf.y0 - start.y <= 0.1 * bboxf.height: # close enough?
  21124. r = Rect(start.x, bboxf.y0, bboxf.br) # intersection rectangle
  21125. if not (r.is_empty or r.is_infinite):
  21126. lines.insert(0, r) # insert again if not empty
  21127. else:
  21128. lines.insert(0, bboxf) # insert again
  21129. if lines == []: # the list might have been emptied
  21130. return lines
  21131. # cut off suffix from last line if stop point is close to its bottom
  21132. bboxl = lines.pop()
  21133. if stop.y - bboxl.y1 <= 0.1 * bboxl.height: # close enough?
  21134. r = Rect(bboxl.tl, stop.x, bboxl.y1) # intersection rectangle
  21135. if not (r.is_empty or r.is_infinite):
  21136. lines.append(r) # append if not empty
  21137. else:
  21138. lines.append(bboxl) # append again
  21139. return lines
  21140. def glyph_name_to_unicode(name: str) -> int:
  21141. """Convenience function accessing unicodedata."""
  21142. import unicodedata
  21143. try:
  21144. unc = ord(unicodedata.lookup(name))
  21145. except Exception:
  21146. unc = 65533
  21147. return unc
  21148. def hdist(dir, a, b):
  21149. dx = b.x - a.x
  21150. dy = b.y - a.y
  21151. return mupdf.fz_abs(dx * dir.x + dy * dir.y)
  21152. def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> list:
  21153. """Return a list of (rows x cols) equal sized rectangles.
  21154. Notes:
  21155. A utility to fill a given area with table cells of equal size.
  21156. Args:
  21157. rect: rect_like to use as the table area
  21158. rows: number of rows
  21159. cols: number of columns
  21160. Returns:
  21161. A list with <rows> items, where each item is a list of <cols>
  21162. PyMuPDF Rect objects of equal sizes.
  21163. """
  21164. rect = Rect(rect) # ensure this is a Rect
  21165. if rect.is_empty or rect.is_infinite:
  21166. raise ValueError("rect must be finite and not empty")
  21167. tl = rect.tl
  21168. height = rect.height / rows # height of one table cell
  21169. width = rect.width / cols # width of one table cell
  21170. delta_h = (width, 0, width, 0) # diff to next right rect
  21171. delta_v = (0, height, 0, height) # diff to next lower rect
  21172. r = Rect(tl, tl.x + width, tl.y + height) # first rectangle
  21173. # make the first row
  21174. row = [r]
  21175. for i in range(1, cols):
  21176. r += delta_h # build next rect to the right
  21177. row.append(r)
  21178. # make result, starts with first row
  21179. rects = [row]
  21180. for i in range(1, rows):
  21181. row = rects[i - 1] # take previously appended row
  21182. nrow = [] # the new row to append
  21183. for r in row: # for each previous cell add its downward copy
  21184. nrow.append(r + delta_v)
  21185. rects.append(nrow) # append new row to result
  21186. return rects
  21187. def util_ensure_widget_calc(annot):
  21188. '''
  21189. Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO
  21190. '''
  21191. annot_obj = mupdf.pdf_annot_obj(annot.this)
  21192. pdf = mupdf.pdf_get_bound_document(annot_obj)
  21193. PDFNAME_CO = mupdf.pdf_new_name("CO") # = PDF_NAME(CO)
  21194. acro = mupdf.pdf_dict_getl( # get AcroForm dict
  21195. mupdf.pdf_trailer(pdf),
  21196. PDF_NAME('Root'),
  21197. PDF_NAME('AcroForm'),
  21198. )
  21199. CO = mupdf.pdf_dict_get(acro, PDFNAME_CO) # = AcroForm/CO
  21200. if not mupdf.pdf_is_array(CO):
  21201. CO = mupdf.pdf_dict_put_array(acro, PDFNAME_CO, 2)
  21202. n = mupdf.pdf_array_len(CO)
  21203. found = 0
  21204. xref = mupdf.pdf_to_num(annot_obj)
  21205. for i in range(n):
  21206. nxref = mupdf.pdf_to_num(mupdf.pdf_array_get(CO, i))
  21207. if xref == nxref:
  21208. found = 1
  21209. break
  21210. if not found:
  21211. mupdf.pdf_array_push(CO, mupdf.pdf_new_indirect(pdf, xref, 0))
  21212. def util_make_rect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
  21213. '''
  21214. Helper for initialising rectangle classes.
  21215. 2022-09-02: This is quite different from PyMuPDF's util_make_rect(), which
  21216. uses `goto` in ways that don't easily translate to Python.
  21217. Returns (x0, y0, x1, y1) derived from <args>, then override with p0, p1,
  21218. x0, y0, x1, y1 if they are not None.
  21219. Accepts following forms for <args>:
  21220. () returns all zeros.
  21221. (top-left, bottom-right)
  21222. (top-left, x1, y1)
  21223. (x0, y0, bottom-right)
  21224. (x0, y0, x1, y1)
  21225. (rect)
  21226. Where top-left and bottom-right are (x, y) or something with .x, .y
  21227. members; rect is something with .x0, .y0, .x1, and .y1 members.
  21228. 2023-11-18: we now override with p0, p1, x0, y0, x1, y1 if not None.
  21229. '''
  21230. def get_xy( arg):
  21231. if isinstance( arg, (list, tuple)) and len( arg) == 2:
  21232. return arg[0], arg[1]
  21233. if isinstance( arg, (Point, mupdf.FzPoint, mupdf.fz_point)):
  21234. return arg.x, arg.y
  21235. return None, None
  21236. def make_tuple( a):
  21237. if isinstance( a, tuple):
  21238. return a
  21239. if isinstance( a, Point):
  21240. return a.x, a.y
  21241. elif isinstance( a, (Rect, IRect, mupdf.FzRect, mupdf.fz_rect)):
  21242. return a.x0, a.y0, a.x1, a.y1
  21243. if not isinstance( a, (list, tuple)):
  21244. a = a,
  21245. return a
  21246. def handle_args():
  21247. if len(args) == 0:
  21248. return 0, 0, 0, 0
  21249. elif len(args) == 1:
  21250. arg = args[0]
  21251. if isinstance( arg, (list, tuple)) and len( arg) == 2:
  21252. p1, p2 = arg
  21253. ret = *p1, *p2
  21254. assert len(ret) == 4
  21255. return ret
  21256. if isinstance( arg, (list, tuple)) and len( arg) == 3:
  21257. a, b, c = arg
  21258. a = make_tuple(a)
  21259. b = make_tuple(b)
  21260. c = make_tuple(c)
  21261. ret = *a, *b, *c
  21262. assert len(ret) == 4
  21263. return ret
  21264. ret = make_tuple( arg)
  21265. assert len(ret) == 4, f'{arg=} {ret=}'
  21266. return ret
  21267. elif len(args) == 2:
  21268. ret = get_xy( args[0]) + get_xy( args[1])
  21269. assert len(ret) == 4
  21270. return ret
  21271. elif len(args) == 3:
  21272. x0, y0 = get_xy( args[0])
  21273. if (x0, y0) != (None, None):
  21274. return x0, y0, args[1], args[2]
  21275. x1, y1 = get_xy( args[2])
  21276. if (x1, y1) != (None, None):
  21277. return args[0], args[1], x1, y1
  21278. elif len(args) == 4:
  21279. return args[0], args[1], args[2], args[3]
  21280. raise Exception( f'Unrecognised args: {args}')
  21281. ret_x0, ret_y0, ret_x1, ret_y1 = handle_args()
  21282. if p0 is not None: ret_x0, ret_y0 = get_xy(p0)
  21283. if p1 is not None: ret_x1, ret_y1 = get_xy(p1)
  21284. if x0 is not None: ret_x0 = x0
  21285. if y0 is not None: ret_y0 = y0
  21286. if x1 is not None: ret_x1 = x1
  21287. if y1 is not None: ret_y1 = y1
  21288. return ret_x0, ret_y0, ret_x1, ret_y1
  21289. def util_make_irect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
  21290. a, b, c, d = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
  21291. def convert(x, ceil):
  21292. if ceil:
  21293. return int(math.ceil(x))
  21294. else:
  21295. return int(math.floor(x))
  21296. a = convert(a, False)
  21297. b = convert(b, False)
  21298. c = convert(c, True)
  21299. d = convert(d, True)
  21300. return a, b, c, d
  21301. def util_round_rect( rect):
  21302. return JM_py_from_irect(mupdf.fz_round_rect(JM_rect_from_py(rect)))
  21303. def util_transform_rect( rect, matrix):
  21304. if g_use_extra:
  21305. return extra.util_transform_rect( rect, matrix)
  21306. return JM_py_from_rect(mupdf.fz_transform_rect(JM_rect_from_py(rect), JM_matrix_from_py(matrix)))
  21307. def util_intersect_rect( r1, r2):
  21308. return JM_py_from_rect(
  21309. mupdf.fz_intersect_rect(
  21310. JM_rect_from_py(r1),
  21311. JM_rect_from_py(r2),
  21312. )
  21313. )
  21314. def util_is_point_in_rect( p, r):
  21315. return mupdf.fz_is_point_inside_rect(
  21316. JM_point_from_py(p),
  21317. JM_rect_from_py(r),
  21318. )
  21319. def util_include_point_in_rect( r, p):
  21320. return JM_py_from_rect(
  21321. mupdf.fz_include_point_in_rect(
  21322. JM_rect_from_py(r),
  21323. JM_point_from_py(p),
  21324. )
  21325. )
  21326. def util_point_in_quad( P, Q):
  21327. p = JM_point_from_py(P)
  21328. q = JM_quad_from_py(Q)
  21329. return mupdf.fz_is_point_inside_quad(p, q)
  21330. def util_transform_point( point, matrix):
  21331. return JM_py_from_point(
  21332. mupdf.fz_transform_point(
  21333. JM_point_from_py(point),
  21334. JM_matrix_from_py(matrix),
  21335. )
  21336. )
  21337. def util_union_rect( r1, r2):
  21338. return JM_py_from_rect(
  21339. mupdf.fz_union_rect(
  21340. JM_rect_from_py(r1),
  21341. JM_rect_from_py(r2),
  21342. )
  21343. )
  21344. def util_concat_matrix( m1, m2):
  21345. return JM_py_from_matrix(
  21346. mupdf.fz_concat(
  21347. JM_matrix_from_py(m1),
  21348. JM_matrix_from_py(m2),
  21349. )
  21350. )
  21351. def util_invert_matrix(matrix):
  21352. if 0:
  21353. # Use MuPDF's fz_invert_matrix().
  21354. if isinstance( matrix, (tuple, list)):
  21355. matrix = mupdf.FzMatrix( *matrix)
  21356. elif isinstance( matrix, mupdf.fz_matrix):
  21357. matrix = mupdf.FzMatrix( matrix)
  21358. elif isinstance( matrix, Matrix):
  21359. matrix = mupdf.FzMatrix( matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f)
  21360. assert isinstance( matrix, mupdf.FzMatrix), f'{type(matrix)=}: {matrix}'
  21361. ret = mupdf.fz_invert_matrix( matrix)
  21362. if ret == matrix and (0
  21363. or abs( matrix.a - 1) >= sys.float_info.epsilon
  21364. or abs( matrix.b - 0) >= sys.float_info.epsilon
  21365. or abs( matrix.c - 0) >= sys.float_info.epsilon
  21366. or abs( matrix.d - 1) >= sys.float_info.epsilon
  21367. ):
  21368. # Inversion not possible.
  21369. return 1, ()
  21370. return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f)
  21371. # Do inversion in python.
  21372. src = JM_matrix_from_py(matrix)
  21373. a = src.a
  21374. det = a * src.d - src.b * src.c
  21375. if det < -sys.float_info.epsilon or det > sys.float_info.epsilon:
  21376. dst = mupdf.FzMatrix()
  21377. rdet = 1 / det
  21378. dst.a = src.d * rdet
  21379. dst.b = -src.b * rdet
  21380. dst.c = -src.c * rdet
  21381. dst.d = a * rdet
  21382. a = -src.e * dst.a - src.f * dst.c
  21383. dst.f = -src.e * dst.b - src.f * dst.d
  21384. dst.e = a
  21385. return 0, (dst.a, dst.b, dst.c, dst.d, dst.e, dst.f)
  21386. return 1, ()
  21387. def util_measure_string( text, fontname, fontsize, encoding):
  21388. font = mupdf.fz_new_base14_font(fontname)
  21389. w = 0
  21390. pos = 0
  21391. while pos < len(text):
  21392. t, c = mupdf.fz_chartorune(text[pos:])
  21393. pos += t
  21394. if encoding == mupdf.PDF_SIMPLE_ENCODING_GREEK:
  21395. c = mupdf.fz_iso8859_7_from_unicode(c)
  21396. elif encoding == mupdf.PDF_SIMPLE_ENCODING_CYRILLIC:
  21397. c = mupdf.fz_windows_1251_from_unicode(c)
  21398. else:
  21399. c = mupdf.fz_windows_1252_from_unicode(c)
  21400. if c < 0:
  21401. c = 0xB7
  21402. g = mupdf.fz_encode_character(font, c)
  21403. dw = mupdf.fz_advance_glyph(font, g, 0)
  21404. w += dw
  21405. ret = w * fontsize
  21406. return ret
  21407. def util_sine_between(C, P, Q):
  21408. # for points C, P, Q compute the sine between lines CP and QP
  21409. c = JM_point_from_py(C)
  21410. p = JM_point_from_py(P)
  21411. q = JM_point_from_py(Q)
  21412. s = mupdf.fz_normalize_vector(mupdf.fz_make_point(q.x - p.x, q.y - p.y))
  21413. m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -p.x, -p.y)
  21414. m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
  21415. m1 = mupdf.fz_concat(m1, m2)
  21416. c = mupdf.fz_transform_point(c, m1)
  21417. c = mupdf.fz_normalize_vector(c)
  21418. return c.y
  21419. def util_hor_matrix(C, P):
  21420. '''
  21421. Return the matrix that maps two points C, P to the x-axis such that
  21422. C -> (0,0) and the image of P have the same distance.
  21423. '''
  21424. c = JM_point_from_py(C)
  21425. p = JM_point_from_py(P)
  21426. # compute (cosine, sine) of vector P-C with double precision:
  21427. s = mupdf.fz_normalize_vector(mupdf.fz_make_point(p.x - c.x, p.y - c.y))
  21428. m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -c.x, -c.y)
  21429. m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
  21430. return JM_py_from_matrix(mupdf.fz_concat(m1, m2))
  21431. def match_string(h0, n0):
  21432. h = 0
  21433. n = 0
  21434. e = h
  21435. delta_h, hc = chartocanon(h0[h:])
  21436. h += delta_h
  21437. delta_n, nc = chartocanon(n0[n:])
  21438. n += delta_n
  21439. while hc == nc:
  21440. e = h
  21441. if hc == ord(' '):
  21442. while 1:
  21443. delta_h, hc = chartocanon(h0[h:])
  21444. h += delta_h
  21445. if hc != ord(' '):
  21446. break
  21447. else:
  21448. delta_h, hc = chartocanon(h0[h:])
  21449. h += delta_h
  21450. if nc == ord(' '):
  21451. while 1:
  21452. delta_n, nc = chartocanon(n0[n:])
  21453. n += delta_n
  21454. if nc != ord(' '):
  21455. break
  21456. else:
  21457. delta_n, nc = chartocanon(n0[n:])
  21458. n += delta_n
  21459. return None if nc != 0 else e
  21460. def on_highlight_char(hits, line, ch):
  21461. assert hits
  21462. assert isinstance(line, mupdf.FzStextLine)
  21463. assert isinstance(ch, mupdf.FzStextChar)
  21464. vfuzz = ch.m_internal.size * hits.vfuzz
  21465. hfuzz = ch.m_internal.size * hits.hfuzz
  21466. ch_quad = JM_char_quad(line, ch)
  21467. if hits.len > 0:
  21468. # fixme: end = hits.quads[-1]
  21469. quad = hits.quads[hits.len - 1]
  21470. end = JM_quad_from_py(quad)
  21471. if ( 1
  21472. and hdist(line.m_internal.dir, end.lr, ch_quad.ll) < hfuzz
  21473. and vdist(line.m_internal.dir, end.lr, ch_quad.ll) < vfuzz
  21474. and hdist(line.m_internal.dir, end.ur, ch_quad.ul) < hfuzz
  21475. and vdist(line.m_internal.dir, end.ur, ch_quad.ul) < vfuzz
  21476. ):
  21477. end.ur = ch_quad.ur
  21478. end.lr = ch_quad.lr
  21479. assert hits.quads[-1] == end
  21480. return
  21481. hits.quads.append(ch_quad)
  21482. hits.len += 1
  21483. def page_merge(doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map):
  21484. '''
  21485. Deep-copies a source page to the target.
  21486. Modified version of function of pdfmerge.c: we also copy annotations, but
  21487. we skip some subtypes. In addition we rotate output.
  21488. '''
  21489. if g_use_extra:
  21490. #log( 'Calling C++ extra.page_merge()')
  21491. return extra.page_merge( doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map)
  21492. # list of object types (per page) we want to copy
  21493. known_page_objs = [
  21494. PDF_NAME('Contents'),
  21495. PDF_NAME('Resources'),
  21496. PDF_NAME('MediaBox'),
  21497. PDF_NAME('CropBox'),
  21498. PDF_NAME('BleedBox'),
  21499. PDF_NAME('TrimBox'),
  21500. PDF_NAME('ArtBox'),
  21501. PDF_NAME('Rotate'),
  21502. PDF_NAME('UserUnit'),
  21503. ]
  21504. page_ref = mupdf.pdf_lookup_page_obj(doc_src, page_from)
  21505. # make new page dict in dest doc
  21506. page_dict = mupdf.pdf_new_dict(doc_des, 4)
  21507. mupdf.pdf_dict_put(page_dict, PDF_NAME('Type'), PDF_NAME('Page'))
  21508. # copy objects of source page into it
  21509. for i in range( len(known_page_objs)):
  21510. obj = mupdf.pdf_dict_get_inheritable( page_ref, known_page_objs[i])
  21511. if obj.m_internal:
  21512. #log( '{=type(graft_map) type(graft_map.this)}')
  21513. mupdf.pdf_dict_put( page_dict, known_page_objs[i], mupdf.pdf_graft_mapped_object(graft_map.this, obj))
  21514. # Copy annotations, but skip Link, Popup, IRT, Widget types
  21515. # If selected, remove dict keys P (parent) and Popup
  21516. if copy_annots:
  21517. old_annots = mupdf.pdf_dict_get( page_ref, PDF_NAME('Annots'))
  21518. n = mupdf.pdf_array_len( old_annots)
  21519. if n > 0:
  21520. new_annots = mupdf.pdf_dict_put_array( page_dict, PDF_NAME('Annots'), n)
  21521. for i in range(n):
  21522. o = mupdf.pdf_array_get( old_annots, i)
  21523. if not o.m_internal or not mupdf.pdf_is_dict(o):
  21524. continue # skip non-dict items
  21525. if mupdf.pdf_dict_gets( o, "IRT").m_internal:
  21526. continue
  21527. subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
  21528. if mupdf.pdf_name_eq( subtype, PDF_NAME('Link')):
  21529. continue
  21530. if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
  21531. continue
  21532. if mupdf.pdf_name_eq(subtype, PDF_NAME('Widget')):
  21533. continue
  21534. mupdf.pdf_dict_del( o, PDF_NAME('Popup'))
  21535. mupdf.pdf_dict_del( o, PDF_NAME('P'))
  21536. copy_o = mupdf.pdf_graft_mapped_object( graft_map.this, o)
  21537. annot = mupdf.pdf_new_indirect( doc_des, mupdf.pdf_to_num( copy_o), 0)
  21538. mupdf.pdf_array_push( new_annots, annot)
  21539. # rotate the page
  21540. if rotate != -1:
  21541. mupdf.pdf_dict_put_int( page_dict, PDF_NAME('Rotate'), rotate)
  21542. # Now add the page dictionary to dest PDF
  21543. ref = mupdf.pdf_add_object( doc_des, page_dict)
  21544. # Insert new page at specified location
  21545. mupdf.pdf_insert_page( doc_des, page_to, ref)
  21546. def paper_rect(s: str) -> Rect:
  21547. """Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked.
  21548. """
  21549. width, height = paper_size(s)
  21550. return Rect(0.0, 0.0, width, height)
  21551. def paper_size(s: str) -> tuple:
  21552. """Return a tuple (width, height) for a given paper format string.
  21553. Notes:
  21554. 'A4-L' will return (842, 595), the values for A4 landscape.
  21555. Suffix '-P' and no suffix return the portrait tuple.
  21556. """
  21557. size = s.lower()
  21558. f = "p"
  21559. if size.endswith("-l"):
  21560. f = "l"
  21561. size = size[:-2]
  21562. if size.endswith("-p"):
  21563. size = size[:-2]
  21564. rc = paper_sizes().get(size, (-1, -1))
  21565. if f == "p":
  21566. return rc
  21567. return (rc[1], rc[0])
  21568. def paper_sizes():
  21569. """Known paper formats @ 72 dpi as a dictionary. Key is the format string
  21570. like "a4" for ISO-A4. Value is the tuple (width, height).
  21571. Information taken from the following web sites:
  21572. www.din-formate.de
  21573. www.din-formate.info/amerikanische-formate.html
  21574. www.directtools.de/wissen/normen/iso.htm
  21575. """
  21576. return {
  21577. "a0": (2384, 3370),
  21578. "a1": (1684, 2384),
  21579. "a10": (74, 105),
  21580. "a2": (1191, 1684),
  21581. "a3": (842, 1191),
  21582. "a4": (595, 842),
  21583. "a5": (420, 595),
  21584. "a6": (298, 420),
  21585. "a7": (210, 298),
  21586. "a8": (147, 210),
  21587. "a9": (105, 147),
  21588. "b0": (2835, 4008),
  21589. "b1": (2004, 2835),
  21590. "b10": (88, 125),
  21591. "b2": (1417, 2004),
  21592. "b3": (1001, 1417),
  21593. "b4": (709, 1001),
  21594. "b5": (499, 709),
  21595. "b6": (354, 499),
  21596. "b7": (249, 354),
  21597. "b8": (176, 249),
  21598. "b9": (125, 176),
  21599. "c0": (2599, 3677),
  21600. "c1": (1837, 2599),
  21601. "c10": (79, 113),
  21602. "c2": (1298, 1837),
  21603. "c3": (918, 1298),
  21604. "c4": (649, 918),
  21605. "c5": (459, 649),
  21606. "c6": (323, 459),
  21607. "c7": (230, 323),
  21608. "c8": (162, 230),
  21609. "c9": (113, 162),
  21610. "card-4x6": (288, 432),
  21611. "card-5x7": (360, 504),
  21612. "commercial": (297, 684),
  21613. "executive": (522, 756),
  21614. "invoice": (396, 612),
  21615. "ledger": (792, 1224),
  21616. "legal": (612, 1008),
  21617. "legal-13": (612, 936),
  21618. "letter": (612, 792),
  21619. "monarch": (279, 540),
  21620. "tabloid-extra": (864, 1296),
  21621. }
  21622. def pdf_lookup_page_loc(doc, needle):
  21623. return mupdf.pdf_lookup_page_loc(doc, needle)
  21624. def pdfobj_string(o, prefix=''):
  21625. '''
  21626. Returns description of mupdf.PdfObj (wrapper for pdf_obj) <o>.
  21627. '''
  21628. assert 0, 'use mupdf.pdf_debug_obj() ?'
  21629. ret = ''
  21630. if mupdf.pdf_is_array(o):
  21631. l = mupdf.pdf_array_len(o)
  21632. ret += f'array {l}\n'
  21633. for i in range(l):
  21634. oo = mupdf.pdf_array_get(o, i)
  21635. ret += pdfobj_string(oo, prefix + ' ')
  21636. ret += '\n'
  21637. elif mupdf.pdf_is_bool(o):
  21638. ret += f'bool: {o.array_get_bool()}\n'
  21639. elif mupdf.pdf_is_dict(o):
  21640. l = mupdf.pdf_dict_len(o)
  21641. ret += f'dict {l}\n'
  21642. for i in range(l):
  21643. key = mupdf.pdf_dict_get_key(o, i)
  21644. value = mupdf.pdf_dict_get( o, key)
  21645. ret += f'{prefix} {key}: '
  21646. ret += pdfobj_string( value, prefix + ' ')
  21647. ret += '\n'
  21648. elif mupdf.pdf_is_embedded_file(o):
  21649. ret += f'embedded_file: {o.embedded_file_name()}\n'
  21650. elif mupdf.pdf_is_indirect(o):
  21651. ret += f'indirect: ...\n'
  21652. elif mupdf.pdf_is_int(o):
  21653. ret += f'int: {mupdf.pdf_to_int(o)}\n'
  21654. elif mupdf.pdf_is_jpx_image(o):
  21655. ret += f'jpx_image:\n'
  21656. elif mupdf.pdf_is_name(o):
  21657. ret += f'name: {mupdf.pdf_to_name(o)}\n'
  21658. elif o.pdf_is_null:
  21659. ret += f'null\n'
  21660. #elif o.pdf_is_number:
  21661. # ret += f'number\n'
  21662. elif o.pdf_is_real:
  21663. ret += f'real: {o.pdf_to_real()}\n'
  21664. elif mupdf.pdf_is_stream(o):
  21665. ret += f'stream\n'
  21666. elif mupdf.pdf_is_string(o):
  21667. ret += f'string: {mupdf.pdf_to_string(o)}\n'
  21668. else:
  21669. ret += '<>\n'
  21670. return ret
  21671. def repair_mono_font(page: "Page", font: "Font") -> None:
  21672. """Repair character spacing for mono fonts.
  21673. Notes:
  21674. Some mono-spaced fonts are displayed with a too large character
  21675. distance, e.g. "a b c" instead of "abc". This utility adds an entry
  21676. "/W[0 65535 w]" to the descendent font(s) of font. The float w is
  21677. taken to be the width of 0x20 (space).
  21678. This should enforce viewers to use 'w' as the character width.
  21679. Args:
  21680. page: pymupdf.Page object.
  21681. font: pymupdf.Font object.
  21682. """
  21683. if not font.flags["mono"]: # font not flagged as monospaced
  21684. return None
  21685. doc = page.parent # the document
  21686. fontlist = page.get_fonts() # list of fonts on page
  21687. xrefs = [ # list of objects referring to font
  21688. f[0]
  21689. for f in fontlist
  21690. if (f[3] == font.name and f[4].startswith("F") and f[5].startswith("Identity"))
  21691. ]
  21692. if xrefs == []: # our font does not occur
  21693. return
  21694. xrefs = set(xrefs) # drop any double counts
  21695. width = int(round((font.glyph_advance(32) * 1000)))
  21696. for xref in xrefs:
  21697. if not TOOLS.set_font_width(doc, xref, width):
  21698. log("Cannot set width for '%s' in xref %i" % (font.name, xref))
  21699. def sRGB_to_pdf(srgb: int) -> tuple:
  21700. """Convert sRGB color code to a PDF color triple.
  21701. There is **no error checking** for performance reasons!
  21702. Args:
  21703. srgb: (int) RRGGBB (red, green, blue), each color in range(255).
  21704. Returns:
  21705. Tuple (red, green, blue) each item in interval 0 <= item <= 1.
  21706. """
  21707. t = sRGB_to_rgb(srgb)
  21708. return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0
  21709. def sRGB_to_rgb(srgb: int) -> tuple:
  21710. """Convert sRGB color code to an RGB color triple.
  21711. There is **no error checking** for performance reasons!
  21712. Args:
  21713. srgb: (int) SSRRGGBB (red, green, blue), each color in range(255).
  21714. With MuPDF < 1.26, `s` is always 0.
  21715. Returns:
  21716. Tuple (red, green, blue) each item in interval 0 <= item <= 255.
  21717. """
  21718. srgb &= 0xffffff
  21719. r = srgb >> 16
  21720. g = (srgb - (r << 16)) >> 8
  21721. b = srgb - (r << 16) - (g << 8)
  21722. return (r, g, b)
  21723. def string_in_names_list(p, names_list):
  21724. n = mupdf.pdf_array_len( names_list) if names_list else 0
  21725. str_ = mupdf.pdf_to_text_string( p)
  21726. for i in range(0, n, 2):
  21727. if mupdf.pdf_to_text_string( mupdf.pdf_array_get( names_list, i)) == str_:
  21728. return 1
  21729. return 0
  21730. def strip_outline(doc, outlines, page_count, page_object_nums, names_list):
  21731. '''
  21732. Returns (count, first, prev).
  21733. '''
  21734. first = None
  21735. count = 0
  21736. current = outlines
  21737. prev = None
  21738. while current.m_internal:
  21739. # Strip any children to start with. This takes care of
  21740. # First / Last / Count for us.
  21741. nc = strip_outlines(doc, current, page_count, page_object_nums, names_list)
  21742. if not dest_is_valid(current, page_count, page_object_nums, names_list):
  21743. if nc == 0:
  21744. # Outline with invalid dest and no children. Drop it by
  21745. # pulling the next one in here.
  21746. next = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
  21747. if not next.m_internal:
  21748. # There is no next one to pull in
  21749. if prev.m_internal:
  21750. mupdf.pdf_dict_del(prev, PDF_NAME('Next'))
  21751. elif prev.m_internal:
  21752. mupdf.pdf_dict_put(prev, PDF_NAME('Next'), next)
  21753. mupdf.pdf_dict_put(next, PDF_NAME('Prev'), prev)
  21754. else:
  21755. mupdf.pdf_dict_del(next, PDF_NAME('Prev'))
  21756. current = next
  21757. else:
  21758. # Outline with invalid dest, but children. Just drop the dest.
  21759. mupdf.pdf_dict_del(current, PDF_NAME('Dest'))
  21760. mupdf.pdf_dict_del(current, PDF_NAME('A'))
  21761. current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
  21762. else:
  21763. # Keep this one
  21764. if not first or not first.m_internal:
  21765. first = current
  21766. prev = current
  21767. current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
  21768. count += 1
  21769. return count, first, prev
  21770. def strip_outlines(doc, outlines, page_count, page_object_nums, names_list):
  21771. if not outlines.m_internal:
  21772. return 0
  21773. first = mupdf.pdf_dict_get(outlines, PDF_NAME('First'))
  21774. if not first.m_internal:
  21775. nc = 0
  21776. else:
  21777. nc, first, last = strip_outline(doc, first, page_count, page_object_nums, names_list)
  21778. if nc == 0:
  21779. mupdf.pdf_dict_del(outlines, PDF_NAME('First'))
  21780. mupdf.pdf_dict_del(outlines, PDF_NAME('Last'))
  21781. mupdf.pdf_dict_del(outlines, PDF_NAME('Count'))
  21782. else:
  21783. old_count = mupdf.pdf_to_int(mupdf.pdf_dict_get(outlines, PDF_NAME('Count')))
  21784. mupdf.pdf_dict_put(outlines, PDF_NAME('First'), first)
  21785. mupdf.pdf_dict_put(outlines, PDF_NAME('Last'), last)
  21786. mupdf.pdf_dict_put(outlines, PDF_NAME('Count'), mupdf.pdf_new_int(nc if old_count > 0 else -nc))
  21787. return nc
  21788. trace_device_FILL_PATH = 1
  21789. trace_device_STROKE_PATH = 2
  21790. trace_device_CLIP_PATH = 3
  21791. trace_device_CLIP_STROKE_PATH = 4
  21792. def unicode_to_glyph_name(ch: int) -> str:
  21793. """
  21794. Convenience function accessing unicodedata.
  21795. """
  21796. import unicodedata
  21797. try:
  21798. name = unicodedata.name(chr(ch))
  21799. except ValueError:
  21800. name = ".notdef"
  21801. return name
  21802. def vdist(dir, a, b):
  21803. dx = b.x - a.x
  21804. dy = b.y - a.y
  21805. return mupdf.fz_abs(dx * dir.y + dy * dir.x)
  21806. def apply_pages(
  21807. path,
  21808. pagefn,
  21809. *,
  21810. pagefn_args=(),
  21811. pagefn_kwargs=dict(),
  21812. initfn=None,
  21813. initfn_args=(),
  21814. initfn_kwargs=dict(),
  21815. pages=None,
  21816. method='single',
  21817. concurrency=None,
  21818. _stats=False,
  21819. ):
  21820. '''
  21821. Returns list of results from `pagefn()`, optionally using concurrency for
  21822. speed.
  21823. Args:
  21824. path:
  21825. Path of document.
  21826. pagefn:
  21827. Function to call for each page; is passed (page, *pagefn_args,
  21828. **pagefn_kwargs). Return value is added to list that we return. If
  21829. `method` is not 'single', must be a top-level function - nested
  21830. functions don't work with concurrency.
  21831. pagefn_args
  21832. pagefn_kwargs:
  21833. Additional args to pass to `pagefn`. Must be picklable.
  21834. initfn:
  21835. If true, called once in each worker process; is passed
  21836. (*initfn_args, **initfn_kwargs).
  21837. initfn_args
  21838. initfn_kwargs:
  21839. Args to pass to initfn. Must be picklable.
  21840. pages:
  21841. List of page numbers to process, or None to include all pages.
  21842. method:
  21843. 'single'
  21844. Do not use concurrency.
  21845. 'mp'
  21846. Operate concurrently using Python's `multiprocessing` module.
  21847. 'fork'
  21848. Operate concurrently using custom implementation with
  21849. `os.fork()`. Does not work on Windows.
  21850. concurrency:
  21851. Number of worker processes to use when operating concurrently. If
  21852. None, we use the number of available CPUs.
  21853. _stats:
  21854. Internal, may change or be removed. If true, we output simple
  21855. timing diagnostics.
  21856. Note: We require a file path rather than a Document, because Document
  21857. instances do not work properly after a fork - internal file descriptor
  21858. offsets are shared between the parent and child processes.
  21859. '''
  21860. if _stats:
  21861. t0 = time.time()
  21862. if method == 'single':
  21863. if initfn:
  21864. initfn(*initfn_args, **initfn_kwargs)
  21865. ret = list()
  21866. document = Document(path)
  21867. if pages is None:
  21868. pages = range(len(document))
  21869. for pno in pages:
  21870. page = document[pno]
  21871. r = pagefn(page, *pagefn_args, **initfn_kwargs)
  21872. ret.append(r)
  21873. else:
  21874. # Use concurrency.
  21875. #
  21876. from . import _apply_pages
  21877. if pages is None:
  21878. if _stats:
  21879. t = time.time()
  21880. with Document(path) as document:
  21881. num_pages = len(document)
  21882. pages = list(range(num_pages))
  21883. if _stats:
  21884. t = time.time() - t
  21885. log(f'{t:.2f}s: count pages.')
  21886. if _stats:
  21887. t = time.time()
  21888. if method == 'mp':
  21889. ret = _apply_pages._multiprocessing(
  21890. path,
  21891. pages,
  21892. pagefn,
  21893. pagefn_args,
  21894. pagefn_kwargs,
  21895. initfn,
  21896. initfn_args,
  21897. initfn_kwargs,
  21898. concurrency,
  21899. _stats,
  21900. )
  21901. elif method == 'fork':
  21902. ret = _apply_pages._fork(
  21903. path,
  21904. pages,
  21905. pagefn,
  21906. pagefn_args,
  21907. pagefn_kwargs,
  21908. initfn,
  21909. initfn_args,
  21910. initfn_kwargs,
  21911. concurrency,
  21912. _stats,
  21913. )
  21914. else:
  21915. assert 0, f'Unrecognised {method=}.'
  21916. if _stats:
  21917. t = time.time() - t
  21918. log(f'{t:.2f}s: work.')
  21919. if _stats:
  21920. t = time.time() - t0
  21921. log(f'{t:.2f}s: total.')
  21922. return ret
  21923. def get_text(
  21924. path,
  21925. *,
  21926. pages=None,
  21927. method='single',
  21928. concurrency=None,
  21929. option='text',
  21930. clip=None,
  21931. flags=None,
  21932. textpage=None,
  21933. sort=False,
  21934. delimiters=None,
  21935. _stats=False,
  21936. ):
  21937. '''
  21938. Returns list of results from `Page.get_text()`, optionally using
  21939. concurrency for speed.
  21940. Args:
  21941. path:
  21942. Path of document.
  21943. pages:
  21944. List of page numbers to process, or None to include all pages.
  21945. method:
  21946. 'single'
  21947. Do not use concurrency.
  21948. 'mp'
  21949. Operate concurrently using Python's `multiprocessing` module.
  21950. 'fork'
  21951. Operate concurrently using custom implementation with
  21952. `os.fork`. Does not work on Windows.
  21953. concurrency:
  21954. Number of worker processes to use when operating concurrently. If
  21955. None, we use the number of available CPUs.
  21956. option
  21957. clip
  21958. flags
  21959. textpage
  21960. sort
  21961. delimiters:
  21962. Passed to internal calls to `Page.get_text()`.
  21963. '''
  21964. args_dict = dict(
  21965. option=option,
  21966. clip=clip,
  21967. flags=flags,
  21968. textpage=textpage,
  21969. sort=sort,
  21970. delimiters=delimiters,
  21971. )
  21972. return apply_pages(
  21973. path,
  21974. Page.get_text,
  21975. pagefn_kwargs=args_dict,
  21976. pages=pages,
  21977. method=method,
  21978. concurrency=concurrency,
  21979. _stats=_stats,
  21980. )
  21981. class TOOLS:
  21982. '''
  21983. We use @staticmethod to avoid the need to create an instance of this class.
  21984. '''
  21985. def _derotate_matrix(page):
  21986. if isinstance(page, mupdf.PdfPage):
  21987. return JM_py_from_matrix(JM_derotate_page_matrix(page))
  21988. else:
  21989. return JM_py_from_matrix(mupdf.FzMatrix())
  21990. @staticmethod
  21991. def _fill_widget(annot, widget):
  21992. val = JM_get_widget_properties(annot, widget)
  21993. widget.rect = Rect(annot.rect)
  21994. widget.xref = annot.xref
  21995. widget.parent = annot.parent
  21996. widget._annot = annot # backpointer to annot object
  21997. if not widget.script:
  21998. widget.script = None
  21999. if not widget.script_stroke:
  22000. widget.script_stroke = None
  22001. if not widget.script_format:
  22002. widget.script_format = None
  22003. if not widget.script_change:
  22004. widget.script_change = None
  22005. if not widget.script_calc:
  22006. widget.script_calc = None
  22007. if not widget.script_blur:
  22008. widget.script_blur = None
  22009. if not widget.script_focus:
  22010. widget.script_focus = None
  22011. return val
  22012. @staticmethod
  22013. def _get_all_contents(page):
  22014. page = _as_pdf_page(page.this)
  22015. res = JM_read_contents(page.obj())
  22016. result = JM_BinFromBuffer( res)
  22017. return result
  22018. @staticmethod
  22019. def _insert_contents(page, newcont, overlay=1):
  22020. """Add bytes as a new /Contents object for a page, and return its xref."""
  22021. pdfpage = _as_pdf_page(page, required=1)
  22022. contbuf = JM_BufferFromBytes(newcont)
  22023. xref = JM_insert_contents(pdfpage.doc(), pdfpage.obj(), contbuf, overlay)
  22024. #fixme: pdfpage->doc->dirty = 1;
  22025. return xref
  22026. @staticmethod
  22027. def _le_annot_parms(annot, p1, p2, fill_color):
  22028. """Get common parameters for making annot line end symbols.
  22029. Returns:
  22030. m: matrix that maps p1, p2 to points L, P on the x-axis
  22031. im: its inverse
  22032. L, P: transformed p1, p2
  22033. w: line width
  22034. scol: stroke color string
  22035. fcol: fill color store_shrink
  22036. opacity: opacity string (gs command)
  22037. """
  22038. w = annot.border["width"] # line width
  22039. sc = annot.colors["stroke"] # stroke color
  22040. if not sc: # black if missing
  22041. sc = (0,0,0)
  22042. scol = " ".join(map(str, sc)) + " RG\n"
  22043. if fill_color:
  22044. fc = fill_color
  22045. else:
  22046. fc = annot.colors["fill"] # fill color
  22047. if not fc:
  22048. fc = (1,1,1) # white if missing
  22049. fcol = " ".join(map(str, fc)) + " rg\n"
  22050. # nr = annot.rect
  22051. np1 = p1 # point coord relative to annot rect
  22052. np2 = p2 # point coord relative to annot rect
  22053. m = Matrix(util_hor_matrix(np1, np2)) # matrix makes the line horizontal
  22054. im = ~m # inverted matrix
  22055. L = np1 * m # converted start (left) point
  22056. R = np2 * m # converted end (right) point
  22057. if 0 <= annot.opacity < 1:
  22058. opacity = "/H gs\n"
  22059. else:
  22060. opacity = ""
  22061. return m, im, L, R, w, scol, fcol, opacity
  22062. @staticmethod
  22063. def _le_butt(annot, p1, p2, lr, fill_color):
  22064. """Make stream commands for butt line end symbol. "lr" denotes left (False) or right point.
  22065. """
  22066. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  22067. shift = 3
  22068. d = shift * max(1, w)
  22069. M = R if lr else L
  22070. top = (M + (0, -d/2.)) * im
  22071. bot = (M + (0, d/2.)) * im
  22072. ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
  22073. ap += "%f %f l\n" % (bot.x, bot.y)
  22074. ap += _format_g(w) + " w\n"
  22075. ap += scol + "s\nQ\n"
  22076. return ap
  22077. @staticmethod
  22078. def _le_circle(annot, p1, p2, lr, fill_color):
  22079. """Make stream commands for circle line end symbol. "lr" denotes left (False) or right point.
  22080. """
  22081. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  22082. shift = 2.5 # 2*shift*width = length of square edge
  22083. d = shift * max(1, w)
  22084. M = R - (d/2., 0) if lr else L + (d/2., 0)
  22085. r = Rect(M, M) + (-d, -d, d, d) # the square
  22086. ap = "q\n" + opacity + TOOLS._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im)
  22087. ap += _format_g(w) + " w\n"
  22088. ap += scol + fcol + "b\nQ\n"
  22089. return ap
  22090. @staticmethod
  22091. def _le_closedarrow(annot, p1, p2, lr, fill_color):
  22092. """Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point.
  22093. """
  22094. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  22095. shift = 2.5
  22096. d = shift * max(1, w)
  22097. p2 = R + (d/2., 0) if lr else L - (d/2., 0)
  22098. p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
  22099. p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
  22100. p1 *= im
  22101. p2 *= im
  22102. p3 *= im
  22103. ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
  22104. ap += "%f %f l\n" % (p2.x, p2.y)
  22105. ap += "%f %f l\n" % (p3.x, p3.y)
  22106. ap += _format_g(w) + " w\n"
  22107. ap += scol + fcol + "b\nQ\n"
  22108. return ap
  22109. @staticmethod
  22110. def _le_diamond(annot, p1, p2, lr, fill_color):
  22111. """Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point.
  22112. """
  22113. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  22114. shift = 2.5 # 2*shift*width = length of square edge
  22115. d = shift * max(1, w)
  22116. M = R - (d/2., 0) if lr else L + (d/2., 0)
  22117. r = Rect(M, M) + (-d, -d, d, d) # the square
  22118. # the square makes line longer by (2*shift - 1)*width
  22119. p = (r.tl + (r.bl - r.tl) * 0.5) * im
  22120. ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
  22121. p = (r.tl + (r.tr - r.tl) * 0.5) * im
  22122. ap += "%f %f l\n" % (p.x, p.y)
  22123. p = (r.tr + (r.br - r.tr) * 0.5) * im
  22124. ap += "%f %f l\n" % (p.x, p.y)
  22125. p = (r.br + (r.bl - r.br) * 0.5) * im
  22126. ap += "%f %f l\n" % (p.x, p.y)
  22127. ap += _format_g(w) + " w\n"
  22128. ap += scol + fcol + "b\nQ\n"
  22129. return ap
  22130. @staticmethod
  22131. def _le_openarrow(annot, p1, p2, lr, fill_color):
  22132. """Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point.
  22133. """
  22134. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  22135. shift = 2.5
  22136. d = shift * max(1, w)
  22137. p2 = R + (d/2., 0) if lr else L - (d/2., 0)
  22138. p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
  22139. p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
  22140. p1 *= im
  22141. p2 *= im
  22142. p3 *= im
  22143. ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
  22144. ap += "%f %f l\n" % (p2.x, p2.y)
  22145. ap += "%f %f l\n" % (p3.x, p3.y)
  22146. ap += _format_g(w) + " w\n"
  22147. ap += scol + "S\nQ\n"
  22148. return ap
  22149. @staticmethod
  22150. def _le_rclosedarrow(annot, p1, p2, lr, fill_color):
  22151. """Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point.
  22152. """
  22153. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  22154. shift = 2.5
  22155. d = shift * max(1, w)
  22156. p2 = R - (2*d, 0) if lr else L + (2*d, 0)
  22157. p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
  22158. p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
  22159. p1 *= im
  22160. p2 *= im
  22161. p3 *= im
  22162. ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
  22163. ap += "%f %f l\n" % (p2.x, p2.y)
  22164. ap += "%f %f l\n" % (p3.x, p3.y)
  22165. ap += _format_g(w) + " w\n"
  22166. ap += scol + fcol + "b\nQ\n"
  22167. return ap
  22168. @staticmethod
  22169. def _le_ropenarrow(annot, p1, p2, lr, fill_color):
  22170. """Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point.
  22171. """
  22172. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  22173. shift = 2.5
  22174. d = shift * max(1, w)
  22175. p2 = R - (d/3., 0) if lr else L + (d/3., 0)
  22176. p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
  22177. p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
  22178. p1 *= im
  22179. p2 *= im
  22180. p3 *= im
  22181. ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
  22182. ap += "%f %f l\n" % (p2.x, p2.y)
  22183. ap += "%f %f l\n" % (p3.x, p3.y)
  22184. ap += _format_g(w) + " w\n"
  22185. ap += scol + fcol + "S\nQ\n"
  22186. return ap
  22187. @staticmethod
  22188. def _le_slash(annot, p1, p2, lr, fill_color):
  22189. """Make stream commands for slash line end symbol. "lr" denotes left (False) or right point.
  22190. """
  22191. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  22192. rw = 1.1547 * max(1, w) * 1.0 # makes rect diagonal a 30 deg inclination
  22193. M = R if lr else L
  22194. r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w)
  22195. top = r.tl * im
  22196. bot = r.br * im
  22197. ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
  22198. ap += "%f %f l\n" % (bot.x, bot.y)
  22199. ap += _format_g(w) + " w\n"
  22200. ap += scol + "s\nQ\n"
  22201. return ap
  22202. @staticmethod
  22203. def _le_square(annot, p1, p2, lr, fill_color):
  22204. """Make stream commands for square line end symbol. "lr" denotes left (False) or right point.
  22205. """
  22206. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  22207. shift = 2.5 # 2*shift*width = length of square edge
  22208. d = shift * max(1, w)
  22209. M = R - (d/2., 0) if lr else L + (d/2., 0)
  22210. r = Rect(M, M) + (-d, -d, d, d) # the square
  22211. # the square makes line longer by (2*shift - 1)*width
  22212. p = r.tl * im
  22213. ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
  22214. p = r.tr * im
  22215. ap += "%f %f l\n" % (p.x, p.y)
  22216. p = r.br * im
  22217. ap += "%f %f l\n" % (p.x, p.y)
  22218. p = r.bl * im
  22219. ap += "%f %f l\n" % (p.x, p.y)
  22220. ap += _format_g(w) + " w\n"
  22221. ap += scol + fcol + "b\nQ\n"
  22222. return ap
  22223. @staticmethod
  22224. def _oval_string(p1, p2, p3, p4):
  22225. """Return /AP string defining an oval within a 4-polygon provided as points
  22226. """
  22227. def bezier(p, q, r):
  22228. f = "%f %f %f %f %f %f c\n"
  22229. return f % (p.x, p.y, q.x, q.y, r.x, r.y)
  22230. kappa = 0.55228474983 # magic number
  22231. ml = p1 + (p4 - p1) * 0.5 # middle points ...
  22232. mo = p1 + (p2 - p1) * 0.5 # for each ...
  22233. mr = p2 + (p3 - p2) * 0.5 # polygon ...
  22234. mu = p4 + (p3 - p4) * 0.5 # side
  22235. ol1 = ml + (p1 - ml) * kappa # the 8 bezier
  22236. ol2 = mo + (p1 - mo) * kappa # helper points
  22237. or1 = mo + (p2 - mo) * kappa
  22238. or2 = mr + (p2 - mr) * kappa
  22239. ur1 = mr + (p3 - mr) * kappa
  22240. ur2 = mu + (p3 - mu) * kappa
  22241. ul1 = mu + (p4 - mu) * kappa
  22242. ul2 = ml + (p4 - ml) * kappa
  22243. # now draw, starting from middle point of left side
  22244. ap = "%f %f m\n" % (ml.x, ml.y)
  22245. ap += bezier(ol1, ol2, mo)
  22246. ap += bezier(or1, or2, mr)
  22247. ap += bezier(ur1, ur2, mu)
  22248. ap += bezier(ul1, ul2, ml)
  22249. return ap
  22250. @staticmethod
  22251. def _parse_da(annot):
  22252. if g_use_extra:
  22253. val = extra.Tools_parse_da( annot.this)
  22254. else:
  22255. def Tools__parse_da(annot):
  22256. this_annot = annot.this
  22257. assert isinstance(this_annot, mupdf.PdfAnnot)
  22258. this_annot_obj = mupdf.pdf_annot_obj( this_annot)
  22259. pdf = mupdf.pdf_get_bound_document( this_annot_obj)
  22260. try:
  22261. da = mupdf.pdf_dict_get_inheritable( this_annot_obj, PDF_NAME('DA'))
  22262. if not da.m_internal:
  22263. trailer = mupdf.pdf_trailer(pdf)
  22264. da = mupdf.pdf_dict_getl(trailer,
  22265. PDF_NAME('Root'),
  22266. PDF_NAME('AcroForm'),
  22267. PDF_NAME('DA'),
  22268. )
  22269. da_str = mupdf.pdf_to_text_string(da)
  22270. except Exception:
  22271. if g_exceptions_verbose: exception_info()
  22272. return
  22273. return da_str
  22274. val = Tools__parse_da(annot)
  22275. if not val:
  22276. return ((0,), "", 0)
  22277. font = "Helv"
  22278. fsize = 12
  22279. col = (0, 0, 0)
  22280. dat = val.split() # split on any whitespace
  22281. for i, item in enumerate(dat):
  22282. if item == "Tf":
  22283. font = dat[i - 2][1:]
  22284. fsize = float(dat[i - 1])
  22285. dat[i] = dat[i-1] = dat[i-2] = ""
  22286. continue
  22287. if item == "g": # unicolor text
  22288. col = [(float(dat[i - 1]))]
  22289. dat[i] = dat[i-1] = ""
  22290. continue
  22291. if item == "rg": # RGB colored text
  22292. col = [float(f) for f in dat[i - 3:i]]
  22293. dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
  22294. continue
  22295. if item == "k": # CMYK colored text
  22296. col = [float(f) for f in dat[i - 4:i]]
  22297. dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = ""
  22298. continue
  22299. val = (col, font, fsize)
  22300. return val
  22301. @staticmethod
  22302. def _reset_widget(annot):
  22303. this_annot = annot
  22304. this_annot_obj = mupdf.pdf_annot_obj(this_annot)
  22305. pdf = mupdf.pdf_get_bound_document(this_annot_obj)
  22306. mupdf.pdf_field_reset(pdf, this_annot_obj)
  22307. @staticmethod
  22308. def _rotate_matrix(page):
  22309. pdfpage = page._pdf_page(required=False)
  22310. if not pdfpage.m_internal:
  22311. return JM_py_from_matrix(mupdf.FzMatrix())
  22312. return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage))
  22313. @staticmethod
  22314. def _save_widget(annot, widget):
  22315. JM_set_widget_properties(annot, widget)
  22316. def _update_da(annot, da_str):
  22317. if g_use_extra:
  22318. extra.Tools_update_da( annot.this, da_str)
  22319. else:
  22320. try:
  22321. this_annot = annot.this
  22322. assert isinstance(this_annot, mupdf.PdfAnnot)
  22323. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DA'), da_str)
  22324. mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DS')) # /* not supported */
  22325. mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('RC')) # /* not supported */
  22326. except Exception:
  22327. if g_exceptions_verbose: exception_info()
  22328. return
  22329. return
  22330. @staticmethod
  22331. def gen_id():
  22332. global TOOLS_JM_UNIQUE_ID
  22333. TOOLS_JM_UNIQUE_ID += 1
  22334. return TOOLS_JM_UNIQUE_ID
  22335. @staticmethod
  22336. def glyph_cache_empty():
  22337. '''
  22338. Empty the glyph cache.
  22339. '''
  22340. mupdf.fz_purge_glyph_cache()
  22341. @staticmethod
  22342. def image_profile(stream, keep_image=0):
  22343. '''
  22344. Metadata of an image binary stream.
  22345. '''
  22346. return JM_image_profile(stream, keep_image)
  22347. @staticmethod
  22348. def mupdf_display_errors(on=None):
  22349. '''
  22350. Set MuPDF error display to True or False.
  22351. '''
  22352. global JM_mupdf_show_errors
  22353. if on is not None:
  22354. JM_mupdf_show_errors = bool(on)
  22355. return JM_mupdf_show_errors
  22356. @staticmethod
  22357. def mupdf_display_warnings(on=None):
  22358. '''
  22359. Set MuPDF warnings display to True or False.
  22360. '''
  22361. global JM_mupdf_show_warnings
  22362. if on is not None:
  22363. JM_mupdf_show_warnings = bool(on)
  22364. return JM_mupdf_show_warnings
  22365. @staticmethod
  22366. def mupdf_version():
  22367. '''Get version of MuPDF binary build.'''
  22368. return mupdf.FZ_VERSION
  22369. @staticmethod
  22370. def mupdf_warnings(reset=1):
  22371. '''
  22372. Get the MuPDF warnings/errors with optional reset (default).
  22373. '''
  22374. # Get any trailing `... repeated <N> times...` message.
  22375. mupdf.fz_flush_warnings()
  22376. ret = '\n'.join( JM_mupdf_warnings_store)
  22377. if reset:
  22378. TOOLS.reset_mupdf_warnings()
  22379. return ret
  22380. @staticmethod
  22381. def reset_mupdf_warnings():
  22382. global JM_mupdf_warnings_store
  22383. JM_mupdf_warnings_store = list()
  22384. @staticmethod
  22385. def set_aa_level(level):
  22386. '''
  22387. Set anti-aliasing level.
  22388. '''
  22389. mupdf.fz_set_aa_level(level)
  22390. @staticmethod
  22391. def set_annot_stem( stem=None):
  22392. global JM_annot_id_stem
  22393. if stem is None:
  22394. return JM_annot_id_stem
  22395. len_ = len(stem) + 1
  22396. if len_ > 50:
  22397. len_ = 50
  22398. JM_annot_id_stem = stem[:50]
  22399. return JM_annot_id_stem
  22400. @staticmethod
  22401. def set_font_width(doc, xref, width):
  22402. pdf = _as_pdf_document(doc, required=0)
  22403. if not pdf.m_internal:
  22404. return False
  22405. font = mupdf.pdf_load_object(pdf, xref)
  22406. dfonts = mupdf.pdf_dict_get(font, PDF_NAME('DescendantFonts'))
  22407. if mupdf.pdf_is_array(dfonts):
  22408. n = mupdf.pdf_array_len(dfonts)
  22409. for i in range(n):
  22410. dfont = mupdf.pdf_array_get(dfonts, i)
  22411. warray = mupdf.pdf_new_array(pdf, 3)
  22412. mupdf.pdf_array_push(warray, mupdf.pdf_new_int(0))
  22413. mupdf.pdf_array_push(warray, mupdf.pdf_new_int(65535))
  22414. mupdf.pdf_array_push(warray, mupdf.pdf_new_int(width))
  22415. mupdf.pdf_dict_put(dfont, PDF_NAME('W'), warray)
  22416. return True
  22417. @staticmethod
  22418. def set_graphics_min_line_width(min_line_width):
  22419. '''
  22420. Set the graphics minimum line width.
  22421. '''
  22422. mupdf.fz_set_graphics_min_line_width(min_line_width)
  22423. @staticmethod
  22424. def set_icc( on=0):
  22425. """Set ICC color handling on or off."""
  22426. if on:
  22427. if mupdf.FZ_ENABLE_ICC:
  22428. mupdf.fz_enable_icc()
  22429. else:
  22430. RAISEPY( "MuPDF built w/o ICC support",PyExc_ValueError)
  22431. elif mupdf.FZ_ENABLE_ICC:
  22432. mupdf.fz_disable_icc()
  22433. @staticmethod
  22434. def set_low_memory( on=None):
  22435. """Set / unset MuPDF device caching."""
  22436. if on is not None:
  22437. _globals.no_device_caching = bool(on)
  22438. return _globals.no_device_caching
  22439. @staticmethod
  22440. def set_small_glyph_heights(on=None):
  22441. """Set / unset small glyph heights."""
  22442. if on is not None:
  22443. _globals.small_glyph_heights = bool(on)
  22444. if g_use_extra:
  22445. extra.set_small_glyph_heights(_globals.small_glyph_heights)
  22446. return _globals.small_glyph_heights
  22447. @staticmethod
  22448. def set_subset_fontnames(on=None):
  22449. '''
  22450. Set / unset returning fontnames with their subset prefix.
  22451. '''
  22452. if on is not None:
  22453. _globals.subset_fontnames = bool(on)
  22454. if g_use_extra:
  22455. extra.set_subset_fontnames(_globals.subset_fontnames)
  22456. return _globals.subset_fontnames
  22457. @staticmethod
  22458. def show_aa_level():
  22459. '''
  22460. Show anti-aliasing values.
  22461. '''
  22462. return dict(
  22463. graphics = mupdf.fz_graphics_aa_level(),
  22464. text = mupdf.fz_text_aa_level(),
  22465. graphics_min_line_width = mupdf.fz_graphics_min_line_width(),
  22466. )
  22467. @staticmethod
  22468. def store_maxsize():
  22469. '''
  22470. MuPDF store size limit.
  22471. '''
  22472. # fixme: return gctx->store->max.
  22473. return None
  22474. @staticmethod
  22475. def store_shrink(percent):
  22476. '''
  22477. Free 'percent' of current store size.
  22478. '''
  22479. if percent >= 100:
  22480. mupdf.fz_empty_store()
  22481. return 0
  22482. if percent > 0:
  22483. mupdf.fz_shrink_store( 100 - percent)
  22484. # fixme: return gctx->store->size.
  22485. @staticmethod
  22486. def store_size():
  22487. '''
  22488. MuPDF current store size.
  22489. '''
  22490. # fixme: return gctx->store->size.
  22491. return None
  22492. @staticmethod
  22493. def unset_quad_corrections(on=None):
  22494. '''
  22495. Set ascender / descender corrections on or off.
  22496. '''
  22497. if on is not None:
  22498. _globals.skip_quad_corrections = bool(on)
  22499. if g_use_extra:
  22500. extra.set_skip_quad_corrections(_globals.skip_quad_corrections)
  22501. return _globals.skip_quad_corrections
  22502. # fixme: also defined at top-level.
  22503. JM_annot_id_stem = 'fitz'
  22504. fitz_config = JM_fitz_config()
  22505. # Callbacks not yet supported with cppyy.
  22506. if not mupdf_cppyy:
  22507. mupdf.fz_set_warning_callback(JM_mupdf_warning)
  22508. mupdf.fz_set_error_callback(JM_mupdf_error)
  22509. # If there are pending warnings when we exit, we end up in this sequence:
  22510. #
  22511. # atexit()
  22512. # -> mupdf::internal_thread_state::~internal_thread_state()
  22513. # -> fz_drop_context()
  22514. # -> fz_flush_warnings()
  22515. # -> SWIG Director code
  22516. # -> Python calling JM_mupdf_warning().
  22517. #
  22518. # Unfortunately this causes a SEGV, seemingly because the SWIG Director code has
  22519. # already been torn down.
  22520. #
  22521. # So we use a Python atexit handler to explicitly call fz_flush_warnings();
  22522. # this appears to happen early enough for the Director machinery to still
  22523. # work. So in the sequence above, fz_flush_warnings() will find that there are
  22524. # no pending warnings and will not attempt to call JM_mupdf_warning().
  22525. #
  22526. def _atexit():
  22527. #log( 'PyMuPDF/src/__init__.py:_atexit() called')
  22528. mupdf.fz_flush_warnings()
  22529. mupdf.fz_set_warning_callback(None)
  22530. mupdf.fz_set_error_callback(None)
  22531. #log( '_atexit() returning')
  22532. atexit.register( _atexit)
  22533. # List of (name, red, green, blue) where:
  22534. # name: upper-case name.
  22535. # red, green, blue: integer in range 0..255.
  22536. #
  22537. from . import _wxcolors
  22538. _wxcolors = _wxcolors._wxcolors
  22539. # Dict mapping from name to (red, green, blue).
  22540. # name: lower-case name.
  22541. # red, green, blue: float in range 0..1.
  22542. #
  22543. pdfcolor = dict()
  22544. for name, r, g, b in _wxcolors:
  22545. pdfcolor[name.lower()] = (r/255, g/255, b/255)
  22546. def colors_pdf_dict():
  22547. '''
  22548. Returns dict mapping from name to (red, green, blue).
  22549. name: lower-case name.
  22550. red, green, blue: float in range 0..1.
  22551. '''
  22552. return pdfcolor
  22553. def colors_wx_list():
  22554. '''
  22555. Returns list of (name, red, green, blue) tuples:
  22556. name: upper-case name.
  22557. red, green, blue: integers in range 0..255.
  22558. '''
  22559. return _wxcolors
  22560. def _mupdf_devel(make_links=True):
  22561. '''
  22562. Allows PyMuPDF installation to be used to compile and link programmes that
  22563. use the MuPDF C/C++ API.
  22564. Args:
  22565. make_links:
  22566. If true, then on non-windows we also create softlinks to any shared
  22567. libraries that are supplied with a version suffix; this allows them
  22568. to be used in a link command.
  22569. For example we create links such as:
  22570. site-packages/pymupdf/
  22571. libmupdf.so -> libmupdf.so.26.7
  22572. libmupdfcpp.so -> libmupdfcpp.so.26.7
  22573. Returns: (mupdf_include, mupdf_lib).
  22574. mupdf_include:
  22575. Path of MuPDF include directory within PyMuPDF install.
  22576. mupdf_lib
  22577. Path of MuPDF library directory within PyMuPDF install.
  22578. '''
  22579. import platform
  22580. log(f'{mupdf_version=}')
  22581. p = os.path.normpath(f'{__file__}/..')
  22582. mupdf_include = f'{p}/mupdf-devel/include'
  22583. if platform.system() == 'Windows':
  22584. # Separate .lib files are used at build time.
  22585. mupdf_lib = f'{p}/mupdf-devel/lib'
  22586. else:
  22587. # .so files are used for both buildtime and runtime linking.
  22588. mupdf_lib = p
  22589. log(f'Within installed PyMuPDF:')
  22590. log(f' {mupdf_include=}')
  22591. log(f' {mupdf_lib=}')
  22592. assert os.path.isdir(mupdf_include), f'Not a directory: {mupdf_include=}.'
  22593. assert os.path.isdir(mupdf_lib), f'Not a directory: {mupdf_lib=}.'
  22594. if platform.system() != 'Windows' and make_links:
  22595. # Make symbolic links within the installed pymupdf module so
  22596. # that ld can find libmupdf.so etc. This is a bit of a hack, but
  22597. # necessary because wheels cannot contain symbolic links.
  22598. #
  22599. # For example we create `libmupdf.so -> libmupdf.so.24.8`.
  22600. #
  22601. # We are careful to only create symlinks for the expected MuPDF
  22602. # version, in case old .so files from a previous install are still
  22603. # in place.
  22604. #
  22605. log(f'Creating symlinks in {mupdf_lib=} for MuPDF-{mupdf_version} .so files.')
  22606. regex_suffix = mupdf_version.split('.')[1:3]
  22607. regex_suffix = '[.]'.join(regex_suffix)
  22608. mupdf_lib_regex = f'^(lib[^.]+[.]so)[.]{regex_suffix}$'
  22609. log(f'{mupdf_lib_regex=}.')
  22610. for leaf in os.listdir(mupdf_lib):
  22611. m = re.match(mupdf_lib_regex, leaf)
  22612. if m:
  22613. pfrom = f'{mupdf_lib}/{m.group(1)}'
  22614. # os.path.exists() can return false if softlink exists
  22615. # but points to non-existent file, so we also use
  22616. # `os.path.islink()`.
  22617. if os.path.islink(pfrom) or os.path.exists(pfrom):
  22618. log(f'Removing existing link {pfrom=}.')
  22619. os.remove(pfrom)
  22620. log(f'Creating symlink: {pfrom} -> {leaf}')
  22621. os.symlink(leaf, pfrom)
  22622. return mupdf_include, mupdf_lib
  22623. # We cannot import utils earlier because it imports this .py file itself and
  22624. # uses some pymupdf.* types in function typing.
  22625. #
  22626. from . import utils
  22627. # Use utils.*() fns for some class methods.
  22628. #
  22629. recover_bbox_quad = utils.recover_bbox_quad
  22630. recover_char_quad = utils.recover_char_quad
  22631. recover_line_quad = utils.recover_line_quad
  22632. recover_quad = utils.recover_quad
  22633. recover_span_quad = utils.recover_span_quad
  22634. from .table import find_tables
  22635. Page.find_tables = find_tables
  22636. class FitzDeprecation(DeprecationWarning):
  22637. pass
  22638. def restore_aliases():
  22639. warnings.filterwarnings( "once", category=FitzDeprecation)
  22640. def showthis(msg, cat, filename, lineno, file=None, line=None):
  22641. text = warnings.formatwarning(msg, cat, filename, lineno, line=line)
  22642. s = text.find("FitzDeprecation")
  22643. if s < 0:
  22644. log(text)
  22645. return
  22646. text = text[s:].splitlines()[0][4:]
  22647. log(text)
  22648. warnings.showwarning = showthis
  22649. def _alias(class_, new_name, legacy_name=None):
  22650. '''
  22651. Adds an alias for a class_ or module item clled <class_>.<new>.
  22652. class_:
  22653. Class/module to modify; use None for the current module.
  22654. new_name:
  22655. String name of existing item, e.g. name of method.
  22656. legacy_name:
  22657. Name of legacy object to create in <class_>. If None, we generate
  22658. from <item> by removing underscores and capitalising the next
  22659. letter.
  22660. '''
  22661. if class_ is None:
  22662. class_ = sys.modules[__name__]
  22663. if not legacy_name:
  22664. legacy_name = ''
  22665. capitalise_next = False
  22666. for c in new_name:
  22667. if c == '_':
  22668. capitalise_next = True
  22669. elif capitalise_next:
  22670. legacy_name += c.upper()
  22671. capitalise_next = False
  22672. else:
  22673. legacy_name += c
  22674. new_object = getattr( class_, new_name)
  22675. assert not getattr( class_, legacy_name, None), f'class {class_} already has {legacy_name}'
  22676. if callable( new_object):
  22677. def deprecated_function( *args, **kwargs):
  22678. warnings.warn(
  22679. f'"{legacy_name=}" removed from {class_} after v1.19.0 - use "{new_name}".',
  22680. category=FitzDeprecation,
  22681. )
  22682. return new_object( *args, **kwargs)
  22683. setattr( class_, legacy_name, deprecated_function)
  22684. deprecated_function.__doc__ = (
  22685. f'*** Deprecated and removed in version after v1.19.0 - use "{new_name}". ***\n'
  22686. f'{new_object.__doc__}'
  22687. )
  22688. else:
  22689. setattr( class_, legacy_name, new_object)
  22690. _alias( Annot, 'get_file', 'fileGet')
  22691. _alias( Annot, 'get_pixmap')
  22692. _alias( Annot, 'get_sound', 'soundGet')
  22693. _alias( Annot, 'get_text')
  22694. _alias( Annot, 'get_textbox')
  22695. _alias( Annot, 'get_textpage', 'getTextPage')
  22696. _alias( Annot, 'line_ends')
  22697. _alias( Annot, 'set_blendmode', 'setBlendMode')
  22698. _alias( Annot, 'set_border')
  22699. _alias( Annot, 'set_colors')
  22700. _alias( Annot, 'set_flags')
  22701. _alias( Annot, 'set_info')
  22702. _alias( Annot, 'set_line_ends')
  22703. _alias( Annot, 'set_name')
  22704. _alias( Annot, 'set_oc', 'setOC')
  22705. _alias( Annot, 'set_opacity')
  22706. _alias( Annot, 'set_rect')
  22707. _alias( Annot, 'update_file', 'fileUpd')
  22708. _alias( DisplayList, 'get_pixmap')
  22709. _alias( DisplayList, 'get_textpage', 'getTextPage')
  22710. _alias( Document, 'chapter_count')
  22711. _alias( Document, 'chapter_page_count')
  22712. _alias( Document, 'convert_to_pdf', 'convertToPDF')
  22713. _alias( Document, 'copy_page')
  22714. _alias( Document, 'delete_page')
  22715. _alias( Document, 'delete_pages', 'deletePageRange')
  22716. _alias( Document, 'embfile_add', 'embeddedFileAdd')
  22717. _alias( Document, 'embfile_count', 'embeddedFileCount')
  22718. _alias( Document, 'embfile_del', 'embeddedFileDel')
  22719. _alias( Document, 'embfile_get', 'embeddedFileGet')
  22720. _alias( Document, 'embfile_info', 'embeddedFileInfo')
  22721. _alias( Document, 'embfile_names', 'embeddedFileNames')
  22722. _alias( Document, 'embfile_upd', 'embeddedFileUpd')
  22723. _alias( Document, 'extract_font')
  22724. _alias( Document, 'extract_image')
  22725. _alias( Document, 'find_bookmark')
  22726. _alias( Document, 'fullcopy_page')
  22727. _alias( Document, 'get_char_widths')
  22728. _alias( Document, 'get_ocgs', 'getOCGs')
  22729. _alias( Document, 'get_page_fonts', 'getPageFontList')
  22730. _alias( Document, 'get_page_images', 'getPageImageList')
  22731. _alias( Document, 'get_page_pixmap')
  22732. _alias( Document, 'get_page_text')
  22733. _alias( Document, 'get_page_xobjects', 'getPageXObjectList')
  22734. _alias( Document, 'get_sigflags', 'getSigFlags')
  22735. _alias( Document, 'get_toc', 'getToC')
  22736. _alias( Document, 'get_xml_metadata')
  22737. _alias( Document, 'insert_page')
  22738. _alias( Document, 'insert_pdf', 'insertPDF')
  22739. _alias( Document, 'is_dirty')
  22740. _alias( Document, 'is_form_pdf', 'isFormPDF')
  22741. _alias( Document, 'is_pdf', 'isPDF')
  22742. _alias( Document, 'is_reflowable')
  22743. _alias( Document, 'is_repaired')
  22744. _alias( Document, 'last_location')
  22745. _alias( Document, 'load_page')
  22746. _alias( Document, 'make_bookmark')
  22747. _alias( Document, 'move_page')
  22748. _alias( Document, 'needs_pass')
  22749. _alias( Document, 'new_page')
  22750. _alias( Document, 'next_location')
  22751. _alias( Document, 'page_count')
  22752. _alias( Document, 'page_cropbox', 'pageCropBox')
  22753. _alias( Document, 'page_xref')
  22754. _alias( Document, 'pdf_catalog', 'PDFCatalog')
  22755. _alias( Document, 'pdf_trailer', 'PDFTrailer')
  22756. _alias( Document, 'prev_location', 'previousLocation')
  22757. _alias( Document, 'resolve_link')
  22758. _alias( Document, 'search_page_for')
  22759. _alias( Document, 'set_language')
  22760. _alias( Document, 'set_metadata')
  22761. _alias( Document, 'set_toc', 'setToC')
  22762. _alias( Document, 'set_xml_metadata')
  22763. _alias( Document, 'update_object')
  22764. _alias( Document, 'update_stream')
  22765. _alias( Document, 'xref_is_stream', 'isStream')
  22766. _alias( Document, 'xref_length')
  22767. _alias( Document, 'xref_object')
  22768. _alias( Document, 'xref_stream')
  22769. _alias( Document, 'xref_stream_raw')
  22770. _alias( Document, 'xref_xml_metadata', 'metadataXML')
  22771. _alias( IRect, 'get_area')
  22772. _alias( IRect, 'get_area', 'getRectArea')
  22773. _alias( IRect, 'include_point')
  22774. _alias( IRect, 'include_rect')
  22775. _alias( IRect, 'is_empty')
  22776. _alias( IRect, 'is_infinite')
  22777. _alias( Link, 'is_external')
  22778. _alias( Link, 'set_border')
  22779. _alias( Link, 'set_colors')
  22780. _alias( Matrix, 'is_rectilinear')
  22781. _alias( Matrix, 'prerotate', 'preRotate')
  22782. _alias( Matrix, 'prescale', 'preScale')
  22783. _alias( Matrix, 'preshear', 'preShear')
  22784. _alias( Matrix, 'pretranslate', 'preTranslate')
  22785. _alias( None, 'get_pdf_now', 'getPDFnow')
  22786. _alias( None, 'get_pdf_str', 'getPDFstr')
  22787. _alias( None, 'get_text_length')
  22788. _alias( None, 'get_text_length', 'getTextlength')
  22789. _alias( None, 'image_profile', 'ImageProperties')
  22790. _alias( None, 'paper_rect', 'PaperRect')
  22791. _alias( None, 'paper_size', 'PaperSize')
  22792. _alias( None, 'paper_sizes')
  22793. _alias( None, 'planish_line')
  22794. _alias( Outline, 'is_external')
  22795. _alias( Outline, 'is_open')
  22796. _alias( Page, 'add_caret_annot')
  22797. _alias( Page, 'add_circle_annot')
  22798. _alias( Page, 'add_file_annot')
  22799. _alias( Page, 'add_freetext_annot')
  22800. _alias( Page, 'add_highlight_annot')
  22801. _alias( Page, 'add_ink_annot')
  22802. _alias( Page, 'add_line_annot')
  22803. _alias( Page, 'add_polygon_annot')
  22804. _alias( Page, 'add_polyline_annot')
  22805. _alias( Page, 'add_rect_annot')
  22806. _alias( Page, 'add_redact_annot')
  22807. _alias( Page, 'add_squiggly_annot')
  22808. _alias( Page, 'add_stamp_annot')
  22809. _alias( Page, 'add_strikeout_annot')
  22810. _alias( Page, 'add_text_annot')
  22811. _alias( Page, 'add_underline_annot')
  22812. _alias( Page, 'add_widget')
  22813. _alias( Page, 'clean_contents')
  22814. _alias( Page, 'cropbox', 'CropBox')
  22815. _alias( Page, 'cropbox_position', 'CropBoxPosition')
  22816. _alias( Page, 'delete_annot')
  22817. _alias( Page, 'delete_link')
  22818. _alias( Page, 'delete_widget')
  22819. _alias( Page, 'derotation_matrix')
  22820. _alias( Page, 'draw_bezier')
  22821. _alias( Page, 'draw_circle')
  22822. _alias( Page, 'draw_curve')
  22823. _alias( Page, 'draw_line')
  22824. _alias( Page, 'draw_oval')
  22825. _alias( Page, 'draw_polyline')
  22826. _alias( Page, 'draw_quad')
  22827. _alias( Page, 'draw_rect')
  22828. _alias( Page, 'draw_sector')
  22829. _alias( Page, 'draw_squiggle')
  22830. _alias( Page, 'draw_zigzag')
  22831. _alias( Page, 'first_annot')
  22832. _alias( Page, 'first_link')
  22833. _alias( Page, 'first_widget')
  22834. _alias( Page, 'get_contents')
  22835. _alias( Page, 'get_displaylist', 'getDisplayList')
  22836. _alias( Page, 'get_drawings')
  22837. _alias( Page, 'get_fonts', 'getFontList')
  22838. _alias( Page, 'get_image_bbox')
  22839. _alias( Page, 'get_images', 'getImageList')
  22840. _alias( Page, 'get_links')
  22841. _alias( Page, 'get_pixmap')
  22842. _alias( Page, 'get_svg_image', 'getSVGimage')
  22843. _alias( Page, 'get_text')
  22844. _alias( Page, 'get_text_blocks')
  22845. _alias( Page, 'get_text_words')
  22846. _alias( Page, 'get_textbox')
  22847. _alias( Page, 'get_textpage', 'getTextPage')
  22848. _alias( Page, 'insert_font')
  22849. _alias( Page, 'insert_image')
  22850. _alias( Page, 'insert_link')
  22851. _alias( Page, 'insert_text')
  22852. _alias( Page, 'insert_textbox')
  22853. _alias( Page, 'is_wrapped', '_isWrapped')
  22854. _alias( Page, 'load_annot')
  22855. _alias( Page, 'load_links')
  22856. _alias( Page, 'mediabox', 'MediaBox')
  22857. _alias( Page, 'mediabox_size', 'MediaBoxSize')
  22858. _alias( Page, 'new_shape')
  22859. _alias( Page, 'read_contents')
  22860. _alias( Page, 'rotation_matrix')
  22861. _alias( Page, 'search_for')
  22862. _alias( Page, 'set_cropbox', 'setCropBox')
  22863. _alias( Page, 'set_mediabox', 'setMediaBox')
  22864. _alias( Page, 'set_rotation')
  22865. _alias( Page, 'show_pdf_page', 'showPDFpage')
  22866. _alias( Page, 'transformation_matrix')
  22867. _alias( Page, 'update_link')
  22868. _alias( Page, 'wrap_contents')
  22869. _alias( Page, 'write_text')
  22870. _alias( Pixmap, 'clear_with')
  22871. _alias( Pixmap, 'copy', 'copyPixmap')
  22872. _alias( Pixmap, 'gamma_with')
  22873. _alias( Pixmap, 'invert_irect', 'invertIRect')
  22874. _alias( Pixmap, 'pil_save', 'pillowWrite')
  22875. _alias( Pixmap, 'pil_tobytes', 'pillowData')
  22876. _alias( Pixmap, 'save', 'writeImage')
  22877. _alias( Pixmap, 'save', 'writePNG')
  22878. _alias( Pixmap, 'set_alpha')
  22879. _alias( Pixmap, 'set_dpi', 'setResolution')
  22880. _alias( Pixmap, 'set_origin')
  22881. _alias( Pixmap, 'set_pixel')
  22882. _alias( Pixmap, 'set_rect')
  22883. _alias( Pixmap, 'tint_with')
  22884. _alias( Pixmap, 'tobytes', 'getImageData')
  22885. _alias( Pixmap, 'tobytes', 'getPNGData')
  22886. _alias( Pixmap, 'tobytes', 'getPNGdata')
  22887. _alias( Quad, 'is_convex')
  22888. _alias( Quad, 'is_empty')
  22889. _alias( Quad, 'is_rectangular')
  22890. _alias( Rect, 'get_area')
  22891. _alias( Rect, 'get_area', 'getRectArea')
  22892. _alias( Rect, 'include_point')
  22893. _alias( Rect, 'include_rect')
  22894. _alias( Rect, 'is_empty')
  22895. _alias( Rect, 'is_infinite')
  22896. _alias( TextWriter, 'fill_textbox')
  22897. _alias( TextWriter, 'write_text')
  22898. _alias( Shape, 'draw_bezier')
  22899. _alias( Shape, 'draw_circle')
  22900. _alias( Shape, 'draw_curve')
  22901. _alias( Shape, 'draw_line')
  22902. _alias( Shape, 'draw_oval')
  22903. _alias( Shape, 'draw_polyline')
  22904. _alias( Shape, 'draw_quad')
  22905. _alias( Shape, 'draw_rect')
  22906. _alias( Shape, 'draw_sector')
  22907. _alias( Shape, 'draw_squiggle')
  22908. _alias( Shape, 'draw_zigzag')
  22909. _alias( Shape, 'insert_text')
  22910. _alias( Shape, 'insert_textbox')
  22911. if 0:
  22912. restore_aliases()
  22913. __version__ = VersionBind
  22914. __doc__ = (
  22915. f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n'
  22916. f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n'
  22917. )