| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979209802098120982209832098420985209862098720988209892099020991209922099320994209952099620997209982099921000210012100221003210042100521006210072100821009210102101121012210132101421015210162101721018210192102021021210222102321024210252102621027210282102921030210312103221033210342103521036210372103821039210402104121042210432104421045210462104721048210492105021051210522105321054210552105621057210582105921060210612106221063210642106521066210672106821069210702107121072210732107421075210762107721078210792108021081210822108321084210852108621087210882108921090210912109221093210942109521096210972109821099211002110121102211032110421105211062110721108211092111021111211122111321114211152111621117211182111921120211212112221123211242112521126211272112821129211302113121132211332113421135211362113721138211392114021141211422114321144211452114621147211482114921150211512115221153211542115521156211572115821159211602116121162211632116421165211662116721168211692117021171211722117321174211752117621177211782117921180211812118221183211842118521186211872118821189211902119121192211932119421195211962119721198211992120021201212022120321204212052120621207212082120921210212112121221213212142121521216212172121821219212202122121222212232122421225212262122721228212292123021231212322123321234212352123621237212382123921240212412124221243212442124521246212472124821249212502125121252212532125421255212562125721258212592126021261212622126321264212652126621267212682126921270212712127221273212742127521276212772127821279212802128121282212832128421285212862128721288212892129021291212922129321294212952129621297212982129921300213012130221303213042130521306213072130821309213102131121312213132131421315213162131721318213192132021321213222132321324213252132621327213282132921330213312133221333213342133521336213372133821339213402134121342213432134421345213462134721348213492135021351213522135321354213552135621357213582135921360213612136221363213642136521366213672136821369213702137121372213732137421375213762137721378213792138021381213822138321384213852138621387213882138921390213912139221393213942139521396213972139821399214002140121402214032140421405214062140721408214092141021411214122141321414214152141621417214182141921420214212142221423214242142521426214272142821429214302143121432214332143421435214362143721438214392144021441214422144321444214452144621447214482144921450214512145221453214542145521456214572145821459214602146121462214632146421465214662146721468214692147021471214722147321474214752147621477214782147921480214812148221483214842148521486214872148821489214902149121492214932149421495214962149721498214992150021501215022150321504215052150621507215082150921510215112151221513215142151521516215172151821519215202152121522215232152421525215262152721528215292153021531215322153321534215352153621537215382153921540215412154221543215442154521546215472154821549215502155121552215532155421555215562155721558215592156021561215622156321564215652156621567215682156921570215712157221573215742157521576215772157821579215802158121582215832158421585215862158721588215892159021591215922159321594215952159621597215982159921600216012160221603216042160521606216072160821609216102161121612216132161421615216162161721618216192162021621216222162321624216252162621627216282162921630216312163221633216342163521636216372163821639216402164121642216432164421645216462164721648216492165021651216522165321654216552165621657216582165921660216612166221663216642166521666216672166821669216702167121672216732167421675216762167721678216792168021681216822168321684216852168621687216882168921690216912169221693216942169521696216972169821699217002170121702217032170421705217062170721708217092171021711217122171321714217152171621717217182171921720217212172221723217242172521726217272172821729217302173121732217332173421735217362173721738217392174021741217422174321744217452174621747217482174921750217512175221753217542175521756217572175821759217602176121762217632176421765217662176721768217692177021771217722177321774217752177621777217782177921780217812178221783217842178521786217872178821789217902179121792217932179421795217962179721798217992180021801218022180321804218052180621807218082180921810218112181221813218142181521816218172181821819218202182121822218232182421825218262182721828218292183021831218322183321834218352183621837218382183921840218412184221843218442184521846218472184821849218502185121852218532185421855218562185721858218592186021861218622186321864218652186621867218682186921870218712187221873218742187521876218772187821879218802188121882218832188421885218862188721888218892189021891218922189321894218952189621897218982189921900219012190221903219042190521906219072190821909219102191121912219132191421915219162191721918219192192021921219222192321924219252192621927219282192921930219312193221933219342193521936219372193821939219402194121942219432194421945219462194721948219492195021951219522195321954219552195621957219582195921960219612196221963219642196521966219672196821969219702197121972219732197421975219762197721978219792198021981219822198321984219852198621987219882198921990219912199221993219942199521996219972199821999220002200122002220032200422005220062200722008220092201022011220122201322014220152201622017220182201922020220212202222023220242202522026220272202822029220302203122032220332203422035220362203722038220392204022041220422204322044220452204622047220482204922050220512205222053220542205522056220572205822059220602206122062220632206422065220662206722068220692207022071220722207322074220752207622077220782207922080220812208222083220842208522086220872208822089220902209122092220932209422095220962209722098220992210022101221022210322104221052210622107221082210922110221112211222113221142211522116221172211822119221202212122122221232212422125221262212722128221292213022131221322213322134221352213622137221382213922140221412214222143221442214522146221472214822149221502215122152221532215422155221562215722158221592216022161221622216322164221652216622167221682216922170221712217222173221742217522176221772217822179221802218122182221832218422185221862218722188221892219022191221922219322194221952219622197221982219922200222012220222203222042220522206222072220822209222102221122212222132221422215222162221722218222192222022221222222222322224222252222622227222282222922230222312223222233222342223522236222372223822239222402224122242222432224422245222462224722248222492225022251222522225322254222552225622257222582225922260222612226222263222642226522266222672226822269222702227122272222732227422275222762227722278222792228022281222822228322284222852228622287222882228922290222912229222293222942229522296222972229822299223002230122302223032230422305223062230722308223092231022311223122231322314223152231622317223182231922320223212232222323223242232522326223272232822329223302233122332223332233422335223362233722338223392234022341223422234322344223452234622347223482234922350223512235222353223542235522356223572235822359223602236122362223632236422365223662236722368223692237022371223722237322374223752237622377223782237922380223812238222383223842238522386223872238822389223902239122392223932239422395223962239722398223992240022401224022240322404224052240622407224082240922410224112241222413224142241522416224172241822419224202242122422224232242422425224262242722428224292243022431224322243322434224352243622437224382243922440224412244222443224442244522446224472244822449224502245122452224532245422455224562245722458224592246022461224622246322464224652246622467224682246922470224712247222473224742247522476224772247822479224802248122482224832248422485224862248722488224892249022491224922249322494224952249622497224982249922500225012250222503225042250522506225072250822509225102251122512225132251422515225162251722518225192252022521225222252322524225252252622527225282252922530225312253222533225342253522536225372253822539225402254122542225432254422545225462254722548225492255022551225522255322554225552255622557225582255922560225612256222563225642256522566225672256822569225702257122572225732257422575225762257722578225792258022581225822258322584225852258622587225882258922590225912259222593225942259522596225972259822599226002260122602226032260422605226062260722608226092261022611226122261322614226152261622617226182261922620226212262222623226242262522626226272262822629226302263122632226332263422635226362263722638226392264022641226422264322644226452264622647226482264922650226512265222653226542265522656226572265822659226602266122662226632266422665226662266722668226692267022671226722267322674226752267622677226782267922680226812268222683226842268522686226872268822689226902269122692226932269422695226962269722698226992270022701227022270322704227052270622707227082270922710227112271222713227142271522716227172271822719227202272122722227232272422725227262272722728227292273022731227322273322734227352273622737227382273922740227412274222743227442274522746227472274822749227502275122752227532275422755227562275722758227592276022761227622276322764227652276622767227682276922770227712277222773227742277522776227772277822779227802278122782227832278422785227862278722788227892279022791227922279322794227952279622797227982279922800228012280222803228042280522806228072280822809228102281122812228132281422815228162281722818228192282022821228222282322824228252282622827228282282922830228312283222833228342283522836228372283822839228402284122842228432284422845228462284722848228492285022851228522285322854228552285622857228582285922860228612286222863228642286522866228672286822869228702287122872228732287422875228762287722878228792288022881228822288322884228852288622887228882288922890228912289222893228942289522896228972289822899229002290122902229032290422905229062290722908229092291022911229122291322914229152291622917229182291922920229212292222923229242292522926229272292822929229302293122932229332293422935229362293722938229392294022941229422294322944229452294622947229482294922950229512295222953229542295522956229572295822959229602296122962229632296422965229662296722968229692297022971229722297322974229752297622977229782297922980229812298222983229842298522986229872298822989229902299122992229932299422995229962299722998229992300023001230022300323004230052300623007230082300923010230112301223013230142301523016230172301823019230202302123022230232302423025230262302723028230292303023031230322303323034230352303623037230382303923040230412304223043230442304523046230472304823049230502305123052230532305423055230562305723058230592306023061230622306323064230652306623067230682306923070230712307223073230742307523076230772307823079230802308123082230832308423085230862308723088230892309023091230922309323094230952309623097230982309923100231012310223103231042310523106231072310823109231102311123112231132311423115231162311723118231192312023121231222312323124231252312623127231282312923130231312313223133231342313523136231372313823139231402314123142231432314423145231462314723148231492315023151231522315323154231552315623157231582315923160231612316223163231642316523166231672316823169231702317123172231732317423175231762317723178231792318023181231822318323184231852318623187231882318923190231912319223193231942319523196231972319823199232002320123202232032320423205232062320723208232092321023211232122321323214232152321623217232182321923220232212322223223232242322523226232272322823229232302323123232232332323423235232362323723238232392324023241232422324323244232452324623247232482324923250232512325223253232542325523256232572325823259232602326123262232632326423265232662326723268232692327023271232722327323274232752327623277232782327923280232812328223283232842328523286232872328823289232902329123292232932329423295232962329723298232992330023301233022330323304233052330623307233082330923310233112331223313233142331523316233172331823319233202332123322233232332423325233262332723328233292333023331233322333323334233352333623337233382333923340233412334223343233442334523346233472334823349233502335123352233532335423355233562335723358233592336023361233622336323364233652336623367233682336923370233712337223373233742337523376233772337823379233802338123382233832338423385233862338723388233892339023391233922339323394233952339623397233982339923400234012340223403234042340523406234072340823409234102341123412234132341423415234162341723418234192342023421234222342323424234252342623427234282342923430234312343223433234342343523436234372343823439234402344123442234432344423445234462344723448234492345023451234522345323454234552345623457234582345923460234612346223463234642346523466234672346823469234702347123472234732347423475234762347723478234792348023481234822348323484234852348623487234882348923490234912349223493234942349523496234972349823499235002350123502235032350423505235062350723508235092351023511235122351323514235152351623517235182351923520235212352223523235242352523526235272352823529235302353123532235332353423535235362353723538235392354023541235422354323544235452354623547235482354923550235512355223553235542355523556235572355823559235602356123562235632356423565235662356723568235692357023571235722357323574235752357623577235782357923580235812358223583235842358523586235872358823589235902359123592235932359423595235962359723598235992360023601236022360323604236052360623607236082360923610236112361223613236142361523616236172361823619236202362123622236232362423625236262362723628236292363023631236322363323634236352363623637236382363923640236412364223643236442364523646236472364823649236502365123652236532365423655236562365723658236592366023661236622366323664236652366623667236682366923670236712367223673236742367523676236772367823679236802368123682236832368423685236862368723688236892369023691236922369323694236952369623697236982369923700237012370223703237042370523706237072370823709237102371123712237132371423715237162371723718237192372023721237222372323724237252372623727237282372923730237312373223733237342373523736237372373823739237402374123742237432374423745237462374723748237492375023751237522375323754237552375623757237582375923760237612376223763237642376523766237672376823769237702377123772237732377423775237762377723778237792378023781237822378323784237852378623787237882378923790237912379223793237942379523796237972379823799238002380123802238032380423805238062380723808238092381023811238122381323814238152381623817238182381923820238212382223823238242382523826238272382823829238302383123832238332383423835238362383723838238392384023841238422384323844238452384623847238482384923850238512385223853238542385523856238572385823859238602386123862238632386423865238662386723868238692387023871238722387323874238752387623877238782387923880238812388223883238842388523886238872388823889238902389123892238932389423895238962389723898238992390023901239022390323904239052390623907239082390923910239112391223913239142391523916239172391823919239202392123922239232392423925239262392723928239292393023931239322393323934239352393623937239382393923940239412394223943239442394523946239472394823949239502395123952239532395423955239562395723958239592396023961239622396323964239652396623967239682396923970239712397223973239742397523976239772397823979239802398123982239832398423985239862398723988239892399023991239922399323994239952399623997239982399924000240012400224003240042400524006240072400824009240102401124012240132401424015240162401724018240192402024021240222402324024240252402624027240282402924030240312403224033240342403524036240372403824039240402404124042240432404424045240462404724048240492405024051240522405324054240552405624057240582405924060240612406224063240642406524066240672406824069240702407124072240732407424075240762407724078240792408024081240822408324084240852408624087240882408924090240912409224093240942409524096240972409824099241002410124102241032410424105241062410724108241092411024111241122411324114241152411624117241182411924120241212412224123241242412524126241272412824129241302413124132241332413424135241362413724138241392414024141241422414324144241452414624147241482414924150241512415224153241542415524156241572415824159241602416124162241632416424165241662416724168241692417024171241722417324174241752417624177241782417924180241812418224183241842418524186241872418824189241902419124192241932419424195241962419724198241992420024201242022420324204242052420624207242082420924210242112421224213242142421524216242172421824219242202422124222242232422424225242262422724228242292423024231242322423324234242352423624237242382423924240242412424224243242442424524246242472424824249242502425124252242532425424255242562425724258242592426024261242622426324264242652426624267242682426924270242712427224273242742427524276242772427824279242802428124282242832428424285242862428724288242892429024291242922429324294242952429624297242982429924300243012430224303243042430524306243072430824309243102431124312243132431424315243162431724318243192432024321243222432324324243252432624327243282432924330243312433224333243342433524336243372433824339243402434124342243432434424345243462434724348243492435024351243522435324354243552435624357243582435924360243612436224363243642436524366243672436824369243702437124372243732437424375243762437724378243792438024381243822438324384243852438624387243882438924390243912439224393243942439524396243972439824399244002440124402244032440424405244062440724408244092441024411244122441324414244152441624417244182441924420244212442224423244242442524426244272442824429244302443124432244332443424435244362443724438244392444024441244422444324444244452444624447244482444924450244512445224453244542445524456244572445824459244602446124462244632446424465244662446724468244692447024471244722447324474244752447624477244782447924480244812448224483244842448524486244872448824489244902449124492244932449424495244962449724498244992450024501245022450324504245052450624507245082450924510245112451224513245142451524516245172451824519245202452124522245232452424525245262452724528245292453024531245322453324534245352453624537245382453924540245412454224543245442454524546245472454824549245502455124552245532455424555245562455724558245592456024561245622456324564245652456624567245682456924570245712457224573245742457524576245772457824579245802458124582245832458424585245862458724588245892459024591245922459324594245952459624597245982459924600246012460224603246042460524606246072460824609246102461124612246132461424615246162461724618246192462024621246222462324624246252462624627246282462924630246312463224633246342463524636246372463824639246402464124642246432464424645246462464724648246492465024651246522465324654246552465624657246582465924660246612466224663246642466524666246672466824669246702467124672246732467424675246762467724678246792468024681246822468324684246852468624687246882468924690246912469224693246942469524696246972469824699247002470124702247032470424705247062470724708247092471024711247122471324714247152471624717247182471924720247212472224723247242472524726247272472824729247302473124732247332473424735247362473724738247392474024741247422474324744247452474624747247482474924750247512475224753247542475524756247572475824759247602476124762247632476424765247662476724768247692477024771247722477324774247752477624777247782477924780247812478224783247842478524786247872478824789247902479124792247932479424795247962479724798247992480024801248022480324804248052480624807248082480924810248112481224813248142481524816248172481824819248202482124822248232482424825248262482724828248292483024831248322483324834248352483624837248382483924840248412484224843248442484524846248472484824849248502485124852248532485424855248562485724858248592486024861248622486324864248652486624867248682486924870248712487224873248742487524876248772487824879248802488124882248832488424885248862488724888248892489024891248922489324894248952489624897248982489924900249012490224903249042490524906249072490824909249102491124912249132491424915249162491724918249192492024921249222492324924249252492624927249282492924930249312493224933249342493524936249372493824939249402494124942249432494424945249462494724948249492495024951249522495324954249552495624957249582495924960249612496224963249642496524966249672496824969249702497124972249732497424975249762497724978249792498024981249822498324984249852498624987249882498924990249912499224993249942499524996249972499824999250002500125002250032500425005250062500725008250092501025011250122501325014250152501625017250182501925020250212502225023250242502525026250272502825029250302503125032250332503425035250362503725038250392504025041250422504325044250452504625047250482504925050250512505225053250542505525056250572505825059250602506125062250632506425065250662506725068250692507025071250722507325074250752507625077250782507925080250812508225083250842508525086250872508825089250902509125092250932509425095250962509725098250992510025101251022510325104251052510625107251082510925110251112511225113251142511525116251172511825119251202512125122251232512425125251262512725128251292513025131251322513325134251352513625137251382513925140251412514225143251442514525146251472514825149251502515125152251532515425155251562515725158251592516025161251622516325164251652516625167251682516925170251712517225173251742517525176251772517825179251802518125182251832518425185251862518725188251892519025191251922519325194251952519625197251982519925200252012520225203252042520525206252072520825209252102521125212252132521425215252162521725218252192522025221252222522325224252252522625227252282522925230252312523225233252342523525236252372523825239252402524125242252432524425245252462524725248252492525025251252522525325254252552525625257252582525925260252612526225263252642526525266252672526825269252702527125272252732527425275252762527725278252792528025281252822528325284252852528625287252882528925290252912529225293252942529525296252972529825299253002530125302253032530425305253062530725308253092531025311253122531325314253152531625317253182531925320253212532225323253242532525326253272532825329253302533125332253332533425335253362533725338253392534025341253422534325344253452534625347253482534925350253512535225353253542535525356253572535825359253602536125362253632536425365253662536725368253692537025371253722537325374253752537625377253782537925380253812538225383253842538525386253872538825389253902539125392253932539425395253962539725398253992540025401254022540325404254052540625407254082540925410254112541225413254142541525416254172541825419254202542125422254232542425425254262542725428254292543025431254322543325434254352543625437254382543925440254412544225443254442544525446254472544825449254502545125452254532545425455254562545725458254592546025461254622546325464254652546625467254682546925470254712547225473254742547525476254772547825479254802548125482254832548425485254862548725488254892549025491254922549325494254952549625497254982549925500255012550225503255042550525506255072550825509255102551125512255132551425515255162551725518255192552025521255222552325524255252552625527255282552925530255312553225533255342553525536255372553825539255402554125542255432554425545255462554725548255492555025551255522555325554255552555625557255582555925560255612556225563255642556525566255672556825569255702557125572255732557425575255762557725578255792558025581255822558325584255852558625587255882558925590255912559225593255942559525596255972559825599256002560125602256032560425605256062560725608256092561025611256122561325614256152561625617256182561925620256212562225623256242562525626256272562825629256302563125632256332563425635256362563725638256392564025641256422564325644256452564625647256482564925650256512565225653256542565525656256572565825659256602566125662256632566425665256662566725668256692567025671256722567325674256752567625677256782567925680256812568225683256842568525686256872568825689256902569125692256932569425695256962569725698256992570025701257022570325704257052570625707257082570925710257112571225713257142571525716257172571825719257202572125722257232572425725257262572725728257292573025731257322573325734257352573625737257382573925740257412574225743257442574525746257472574825749257502575125752257532575425755257562575725758257592576025761257622576325764257652576625767257682576925770257712577225773257742577525776257772577825779257802578125782257832578425785257862578725788257892579025791257922579325794257952579625797257982579925800258012580225803258042580525806258072580825809258102581125812258132581425815258162581725818258192582025821258222582325824258252582625827258282582925830258312583225833258342583525836258372583825839258402584125842258432584425845258462584725848258492585025851258522585325854258552585625857258582585925860258612586225863258642586525866258672586825869258702587125872258732587425875258762587725878258792588025881258822588325884258852588625887258882588925890258912589225893258942589525896258972589825899 |
- '''
- PyMuPDF implemented on top of MuPDF Python bindings.
- License:
- SPDX-License-Identifier: GPL-3.0-only
- '''
- # To reduce startup times, we don't import everything we require here.
- #
- import atexit
- import binascii
- import collections
- import inspect
- import io
- import math
- import os
- import pathlib
- import glob
- import re
- import string
- import sys
- import tarfile
- import time
- import typing
- import warnings
- import weakref
- import zipfile
- from . import extra
- import importlib.util
- # Set up g_out_log and g_out_message from environment variables.
- #
- # PYMUPDF_MESSAGE controls the destination of user messages (from function
- # `pymupdf.message()`).
- #
- # PYMUPDF_LOG controls the destination of internal development logging (from
- # function `pymupdf.log()`).
- #
- # For syntax, see _make_output()'s `text` arg.
- #
- def _make_output(
- *,
- text=None,
- fd=None,
- stream=None,
- path=None,
- path_append=None,
- pylogging=None,
- pylogging_logger=None,
- pylogging_level=None,
- pylogging_name=None,
- default=None,
- ):
- '''
- Returns a stream that writes to a specified destination, which can be a
- file descriptor, a file, an existing stream or Python's `logging' system.
-
- Args:
- text: text specification of destination.
- fd:<int> - write to file descriptor.
- path:<str> - write to file.
- path+:<str> - append to file.
- logging:<items> - write to Python `logging` module.
- items: comma-separated <name=value> pairs.
- level=<int>
- name=<str>.
- Other names are ignored.
-
- fd: an int file descriptor.
- stream: something with methods .write(text) and .flush().
- If specified we simply return <stream>.
- path: a file path.
- If specified we return a stream that writes to this file.
- path_append: a file path.
- If specified we return a stream that appends to this file.
- pylogging*:
- if any of these args is not None, we return a stream that writes to
- Python's `logging` module.
-
- pylogging:
- Unused other than to activate use of logging module.
- pylogging_logger:
- A logging.Logger; If None, set from <pylogging_name>.
- pylogging_level:
- An int log level, if None we use
- pylogging_logger.getEffectiveLevel().
- pylogging_name:
- Only used if <pylogging_logger> is None:
- If <pylogging_name> is None, we set it to 'pymupdf'.
- Then we do: pylogging_logger = logging.getLogger(pylogging_name)
- '''
- if text is not None:
- # Textual specification, for example from from environment variable.
- if text.startswith('fd:'):
- fd = int(text[3:])
- elif text.startswith('path:'):
- path = text[5:]
- elif text.startswith('path+'):
- path_append = text[5:]
- elif text.startswith('logging:'):
- pylogging = True
- items_d = dict()
- items = text[8:].split(',')
- #items_d = {n: v for (n, v) in [item.split('=', 1) for item in items]}
- for item in items:
- if not item:
- continue
- nv = item.split('=', 1)
- assert len(nv) == 2, f'Need `=` in {item=}.'
- n, v = nv
- items_d[n] = v
- pylogging_level = items_d.get('level')
- if pylogging_level is not None:
- pylogging_level = int(pylogging_level)
- pylogging_name = items_d.get('name', 'pymupdf')
- else:
- assert 0, f'Expected prefix `fd:`, `path:`. `path+:` or `logging:` in {text=}.'
-
- if fd is not None:
- ret = io.open(fd, mode='w', closefd=False)
- elif stream is not None:
- assert hasattr(stream, 'write')
- assert hasattr(stream, 'flush')
- ret = stream
- elif path is not None:
- ret = io.open(path, 'w')
- elif path_append is not None:
- ret = io.open(path_append, 'a')
- elif (0
- or pylogging is not None
- or pylogging_logger is not None
- or pylogging_level is not None
- or pylogging_name is not None
- ):
- import logging
- if pylogging_logger is None:
- if pylogging_name is None:
- pylogging_name = 'pymupdf'
- pylogging_logger = logging.getLogger(pylogging_name)
- assert isinstance(pylogging_logger, logging.Logger)
- if pylogging_level is None:
- pylogging_level = pylogging_logger.getEffectiveLevel()
- class Out:
- def write(self, text):
- # `logging` module appends newlines, but so does the `print()`
- # functions in our caller message() and log() fns, so we need to
- # remove them here.
- text = text.rstrip('\n')
- if text:
- pylogging_logger.log(pylogging_level, text)
- def flush(self):
- pass
- ret = Out()
- else:
- ret = default
- return ret
- # Set steam used by PyMuPDF messaging.
- _g_out_message = _make_output(text=os.environ.get('PYMUPDF_MESSAGE'), default=sys.stdout)
- # Set steam used by PyMuPDF development/debugging logging.
- _g_out_log = _make_output(text=os.environ.get('PYMUPDF_LOG'), default=sys.stdout)
- # Things for testing logging.
- _g_log_items = list()
- _g_log_items_active = False
- def _log_items():
- return _g_log_items
- def _log_items_active(active):
- global _g_log_items_active
- _g_log_items_active = active
-
- def _log_items_clear():
- del _g_log_items[:]
- def set_messages(
- *,
- text=None,
- fd=None,
- stream=None,
- path=None,
- path_append=None,
- pylogging=None,
- pylogging_logger=None,
- pylogging_level=None,
- pylogging_name=None,
- ):
- '''
- Sets destination of PyMuPDF messages. See _make_output() for details.
- '''
- global _g_out_message
- _g_out_message = _make_output(
- text=text,
- fd=fd,
- stream=stream,
- path=path,
- path_append=path_append,
- pylogging=pylogging,
- pylogging_logger=pylogging_logger,
- pylogging_level=pylogging_level,
- pylogging_name=pylogging_name,
- default=_g_out_message,
- )
- def set_log(
- *,
- text=None,
- fd=None,
- stream=None,
- path=None,
- path_append=None,
- pylogging=None,
- pylogging_logger=None,
- pylogging_level=None,
- pylogging_name=None,
- ):
- '''
- Sets destination of PyMuPDF development/debugging logging. See
- _make_output() for details.
- '''
- global _g_out_log
- _g_out_log = _make_output(
- text=text,
- fd=fd,
- stream=stream,
- path=path,
- path_append=path_append,
- pylogging=pylogging,
- pylogging_logger=pylogging_logger,
- pylogging_level=pylogging_level,
- pylogging_name=pylogging_name,
- default=_g_out_log,
- )
- def log( text='', caller=1):
- '''
- For development/debugging diagnostics.
- '''
- try:
- stack = inspect.stack(context=0)
- except StopIteration:
- pass
- else:
- frame_record = stack[caller]
- try:
- filename = os.path.relpath(frame_record.filename)
- except Exception: # Can fail on windows.
- filename = frame_record.filename
- line = frame_record.lineno
- function = frame_record.function
- text = f'{filename}:{line}:{function}(): {text}'
- if _g_log_items_active:
- _g_log_items.append(text)
- if _g_out_log:
- print(text, file=_g_out_log, flush=1)
- def message(text=''):
- '''
- For user messages.
- '''
- # It looks like `print()` does nothing if sys.stdout is None (without
- # raising an exception), but we don't rely on this.
- if _g_out_message:
- print(text, file=_g_out_message, flush=1)
- def exception_info():
- import traceback
- log(f'exception_info:')
- log(traceback.format_exc())
- # PDF names must not contain these characters:
- INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0))
- def get_env_bool( name, default):
- '''
- Returns `True`, `False` or `default` depending on whether $<name> is '1',
- '0' or unset. Otherwise assert-fails.
- '''
- v = os.environ.get( name)
- if v is None:
- ret = default
- elif v == '1':
- ret = True
- elif v == '0':
- ret = False
- else:
- assert 0, f'Unrecognised value for {name}: {v!r}'
- if ret != default:
- log(f'Using non-default setting from {name}: {v!r}')
- return ret
- def get_env_int( name, default):
- '''
- Returns `True`, `False` or `default` depending on whether $<name> is '1',
- '0' or unset. Otherwise assert-fails.
- '''
- v = os.environ.get( name)
- if v is None:
- ret = default
- else:
- ret = int(v)
- if ret != default:
- log(f'Using non-default setting from {name}: {v}')
- return ret
- # All our `except ...` blocks output diagnostics if `g_exceptions_verbose` is
- # true.
- g_exceptions_verbose = get_env_int( 'PYMUPDF_EXCEPTIONS_VERBOSE', 1)
- # $PYMUPDF_USE_EXTRA overrides whether to use optimised C fns in `extra`.
- #
- g_use_extra = get_env_bool( 'PYMUPDF_USE_EXTRA', True)
- # Global switches
- #
- class _Globals:
- def __init__(self):
- self.no_device_caching = 0
- self.small_glyph_heights = 0
- self.subset_fontnames = 0
- self.skip_quad_corrections = 0
- _globals = _Globals()
- _get_layout: typing.Optional[typing.Callable] = None
- # global switch ensuring that the recommendation message is shown at most once
- _recommend_layout = True # must be referred to as "global" everywhere
- def no_recommend_layout():
- """For users who never want to see the layout recommendation."""
- global _recommend_layout
- _recommend_layout = False
- def _warn_layout_once():
- """Check if we should recommend installing the layout package."""
- msg="""Consider using the pymupdf_layout package for a greatly improved page layout analysis."""
- global _recommend_layout
- if (
- 1
- and _recommend_layout # still True?
- and _get_layout is None # no layout function stored here
- # client did not globally disable the recommendation
- and os.getenv("PYMUPDF_SUGGEST_LAYOUT_ANALYZER") != "0"
- # layout is not available in this Python
- and not importlib.util.find_spec("pymupdf.layout")
- ):
- print(msg)
- _recommend_layout = False # never show the message again
- # Optionally use MuPDF via cppyy bindings; experimental and not tested recently
- # as of 2023-01-20 11:51:40
- #
- mupdf_cppyy = os.environ.get( 'MUPDF_CPPYY')
- if mupdf_cppyy is not None:
- # pylint: disable=all
- log( f'{__file__}: $MUPDF_CPPYY={mupdf_cppyy!r} so attempting to import mupdf_cppyy.')
- log( f'{__file__}: $PYTHONPATH={os.environ["PYTHONPATH"]}')
- if mupdf_cppyy == '':
- import mupdf_cppyy
- else:
- import importlib
- mupdf_cppyy = importlib.machinery.SourceFileLoader(
- 'mupdf_cppyy',
- mupdf_cppyy
- ).load_module()
- mupdf = mupdf_cppyy.cppyy.gbl.mupdf
- else:
- # Use MuPDF Python SWIG bindings. We allow import from either our own
- # directory for conventional wheel installs, or from separate place in case
- # we are using a separately-installed system installation of mupdf.
- #
- try:
- from . import mupdf
- except Exception:
- import mupdf
- if hasattr(mupdf, 'internal_check_ndebug'):
- mupdf.internal_check_ndebug()
- mupdf.reinit_singlethreaded()
- def _int_rc(text):
- '''
- Converts string to int, ignoring trailing 'rc...'.
- '''
- rc = text.find('rc')
- if rc >= 0:
- text = text[:rc]
- return int(text)
- # Basic version information.
- #
- # (We use `noqa F401` to avoid flake8 errors such as `F401
- # '._build.mupdf_location' imported but unused`.
- #
- from ._build import mupdf_location # noqa F401
- from ._build import pymupdf_git_branch # noqa F401
- from ._build import pymupdf_git_diff # noqa F401
- from ._build import pymupdf_git_sha # noqa F401
- from ._build import pymupdf_version # noqa F401
- from ._build import pymupdf_version_tuple # noqa F401
- from ._build import swig_version # noqa F401
- from ._build import swig_version_tuple # noqa F401
- mupdf_version = mupdf.FZ_VERSION
- # Removed in PyMuPDF-1.26.1.
- pymupdf_date = None
- # Versions as tuples; useful when comparing versions.
- #
- mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')])
- assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \
- f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}'
- # Legacy version information.
- #
- version = (pymupdf_version, mupdf_version, None)
- VersionFitz = mupdf_version
- VersionBind = pymupdf_version
- VersionDate = None
- # String formatting.
- def _format_g(value, *, fmt='%g'):
- '''
- Returns `value` formatted with mupdf.fz_format_double() if available,
- otherwise with Python's `%`.
- If `value` is a list or tuple, we return a space-separated string of
- formatted values.
- '''
- if isinstance(value, (list, tuple)):
- ret = ''
- for v in value:
- if ret:
- ret += ' '
- ret += _format_g(v, fmt=fmt)
- return ret
- else:
- return mupdf.fz_format_double(fmt, value)
-
- format_g = _format_g
- # ByteString is gone from typing in 3.14.
- # collections.abc.Buffer available from 3.12 only
- try:
- ByteString = typing.ByteString
- except AttributeError:
- ByteString = bytes | bytearray | memoryview
- # Names required by class method typing annotations.
- OptBytes = typing.Optional[ByteString]
- OptDict = typing.Optional[dict]
- OptFloat = typing.Optional[float]
- OptInt = typing.Union[int, None]
- OptSeq = typing.Optional[typing.Sequence]
- OptStr = typing.Optional[str]
- Page = 'Page_forward_decl'
- Point = 'Point_forward_decl'
- matrix_like = 'matrix_like'
- point_like = 'point_like'
- quad_like = 'quad_like'
- rect_like = 'rect_like'
- def _as_fz_document(document):
- '''
- Returns document as a mupdf.FzDocument, upcasting as required. Raises
- 'document closed' exception if closed.
- '''
- if isinstance(document, Document):
- if document.is_closed:
- raise ValueError('document closed')
- document = document.this
- if isinstance(document, mupdf.FzDocument):
- return document
- elif isinstance(document, mupdf.PdfDocument):
- return document.super()
- elif document is None:
- assert 0, f'document is None'
- else:
- assert 0, f'Unrecognised {type(document)=}'
- def _as_pdf_document(document, required=True):
- '''
- Returns `document` downcast to a mupdf.PdfDocument. If downcast fails (i.e.
- `document` is not actually a `PdfDocument`) then we assert-fail if `required`
- is true (the default) else return a `mupdf.PdfDocument` with `.m_internal`
- false.
- '''
- if isinstance(document, Document):
- if document.is_closed:
- raise ValueError('document closed')
- document = document.this
- if isinstance(document, mupdf.PdfDocument):
- return document
- elif isinstance(document, mupdf.FzDocument):
- ret = mupdf.PdfDocument(document)
- if required:
- assert ret.m_internal
- return ret
- elif document is None:
- assert 0, f'document is None'
- else:
- assert 0, f'Unrecognised {type(document)=}'
- def _as_fz_page(page):
- '''
- Returns page as a mupdf.FzPage, upcasting as required.
- '''
- if isinstance(page, Page):
- page = page.this
- if isinstance(page, mupdf.PdfPage):
- return page.super()
- elif isinstance(page, mupdf.FzPage):
- return page
- elif page is None:
- assert 0, f'page is None'
- else:
- assert 0, f'Unrecognised {type(page)=}'
- def _as_pdf_page(page, required=True):
- '''
- Returns `page` downcast to a mupdf.PdfPage. If downcast fails (i.e. `page`
- is not actually a `PdfPage`) then we assert-fail if `required` is true (the
- default) else return a `mupdf.PdfPage` with `.m_internal` false.
- '''
- if isinstance(page, Page):
- page = page.this
- if isinstance(page, mupdf.PdfPage):
- return page
- elif isinstance(page, mupdf.FzPage):
- ret = mupdf.pdf_page_from_fz_page(page)
- if required:
- assert ret.m_internal
- return ret
- elif page is None:
- assert 0, f'page is None'
- else:
- assert 0, f'Unrecognised {type(page)=}'
- def _pdf_annot_page(annot):
- '''
- Wrapper for mupdf.pdf_annot_page() which raises an exception if <annot>
- is not bound to a page instead of returning a mupdf.PdfPage with
- `.m_internal=None`.
- [Some other MuPDF functions such as pdf_update_annot()` already raise a
- similar exception if a pdf_annot's .page field is null.]
- '''
- page = mupdf.pdf_annot_page(annot)
- if not page.m_internal:
- raise RuntimeError('Annot is not bound to a page')
- return page
- # Fixme: we don't support JM_MEMORY=1.
- JM_MEMORY = 0
- # Classes
- #
- class Annot:
- def __init__(self, annot):
- assert isinstance( annot, mupdf.PdfAnnot)
- self.this = annot
- def __repr__(self):
- parent = getattr(self, 'parent', '<>')
- return "'%s' annotation on %s" % (self.type[1], str(parent))
- def __str__(self):
- return self.__repr__()
- def _erase(self):
- if getattr(self, "thisown", False):
- self.thisown = False
- def _get_redact_values(self):
- annot = self.this
- if mupdf.pdf_annot_type(annot) != mupdf.PDF_ANNOT_REDACT:
- return
- values = dict()
- try:
- obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "RO")
- if obj.m_internal:
- message_warning("Ignoring redaction key '/RO'.")
- xref = mupdf.pdf_to_num(obj)
- values[dictkey_xref] = xref
- obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "OverlayText")
- if obj.m_internal:
- text = mupdf.pdf_to_text_string(obj)
- values[dictkey_text] = JM_UnicodeFromStr(text)
- else:
- values[dictkey_text] = ''
- obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'))
- align = 0
- if obj.m_internal:
- align = mupdf.pdf_to_int(obj)
- values[dictkey_align] = align
- except Exception:
- if g_exceptions_verbose: exception_info()
- return
- val = values
- if not val:
- return val
- val["rect"] = self.rect
- text_color, fontname, fontsize = TOOLS._parse_da(self)
- val["text_color"] = text_color
- val["fontname"] = fontname
- val["fontsize"] = fontsize
- fill = self.colors["fill"]
- val["fill"] = fill
- return val
- def _getAP(self):
- if g_use_extra:
- assert isinstance( self.this, mupdf.PdfAnnot)
- ret = extra.Annot_getAP(self.this)
- assert isinstance( ret, bytes)
- return ret
- else:
- r = None
- res = None
- annot = self.this
- assert isinstance( annot, mupdf.PdfAnnot)
- annot_obj = mupdf.pdf_annot_obj( annot)
- ap = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if mupdf.pdf_is_stream( ap):
- res = mupdf.pdf_load_stream( ap)
- if res and res.m_internal:
- r = JM_BinFromBuffer(res)
- return r
- def _setAP(self, buffer_, rect=0):
- try:
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- page = _pdf_annot_page(annot)
- apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if not apobj.m_internal:
- raise RuntimeError( MSG_BAD_APN)
- if not mupdf.pdf_is_stream( apobj):
- raise RuntimeError( MSG_BAD_APN)
- res = JM_BufferFromBytes( buffer_)
- if not res.m_internal:
- raise ValueError( MSG_BAD_BUFFER)
- JM_update_stream( page.doc(), apobj, res, 1)
- if rect:
- bbox = mupdf.pdf_dict_get_rect( annot_obj, PDF_NAME('Rect'))
- mupdf.pdf_dict_put_rect( apobj, PDF_NAME('BBox'), bbox)
- except Exception:
- if g_exceptions_verbose: exception_info()
- def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotate=-1):
- annot = self.this
- assert annot.m_internal
- annot_obj = mupdf.pdf_annot_obj( annot)
- page = _pdf_annot_page(annot)
- pdf = page.doc()
- type_ = mupdf.pdf_annot_type( annot)
- nfcol, fcol = JM_color_FromSequence(fill_color)
- try:
- # remove fill color from unsupported annots
- # or if so requested
- if nfcol == 0 or type_ not in (
- mupdf.PDF_ANNOT_SQUARE,
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON
- ):
- mupdf.pdf_dict_del( annot_obj, PDF_NAME('IC'))
- elif nfcol > 0:
- mupdf.pdf_set_annot_interior_color( annot, fcol[:nfcol])
- insert_rot = 1 if rotate >= 0 else 0
- if type_ not in (
- mupdf.PDF_ANNOT_CARET,
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_FILE_ATTACHMENT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- mupdf.PDF_ANNOT_STAMP,
- mupdf.PDF_ANNOT_TEXT,
- ):
- insert_rot = 0
- if insert_rot:
- mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
- # insert fill color
- if type_ == mupdf.PDF_ANNOT_FREE_TEXT:
- if nfcol > 0:
- mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
- elif nfcol > 0:
- col = mupdf.pdf_new_array(page.doc(), nfcol)
- for i in range( nfcol):
- mupdf.pdf_array_push_real(col, fcol[i])
- mupdf.pdf_dict_put(annot_obj, PDF_NAME('IC'), col)
- mupdf.pdf_dirty_annot(annot)
- mupdf.pdf_update_annot(annot) # let MuPDF update
- pdf.resynth_required = 0
- except Exception as e:
- if g_exceptions_verbose:
- exception_info()
- message( f'cannot update annot: {e}')
- raise
-
- if (opacity < 0 or opacity >= 1) and not blend_mode: # no opacity, no blend_mode
- return True
- try: # create or update /ExtGState
- ap = mupdf.pdf_dict_getl(
- mupdf.pdf_annot_obj(annot),
- PDF_NAME('AP'),
- PDF_NAME('N')
- )
- if not ap.m_internal: # should never happen
- raise RuntimeError( MSG_BAD_APN)
- resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources'))
- if not resources.m_internal: # no Resources yet: make one
- resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2)
-
- alp0 = mupdf.pdf_new_dict( page.doc(), 3)
- if opacity >= 0 and opacity < 1:
- mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity)
- mupdf.pdf_dict_put_real( alp0, PDF_NAME('ca'), opacity)
- mupdf.pdf_dict_put_real( annot_obj, PDF_NAME('CA'), opacity)
- if blend_mode:
- mupdf.pdf_dict_put_name( alp0, PDF_NAME('BM'), blend_mode)
- mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('BM'), blend_mode)
- extg = mupdf.pdf_dict_get( resources, PDF_NAME('ExtGState'))
- if not extg.m_internal: # no ExtGState yet: make one
- extg = mupdf.pdf_dict_put_dict( resources, PDF_NAME('ExtGState'), 2)
- mupdf.pdf_dict_put( extg, PDF_NAME('H'), alp0)
- except Exception as e:
- if g_exceptions_verbose: exception_info()
- message( f'cannot set opacity or blend mode\n: {e}')
- raise
- return True
- @property
- def apn_bbox(self):
- """annotation appearance bbox"""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if not ap.m_internal:
- val = JM_py_from_rect(mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE))
- else:
- rect = mupdf.pdf_dict_get_rect(ap, PDF_NAME('BBox'))
- val = JM_py_from_rect(rect)
- val = Rect(val) * self.get_parent().transformation_matrix
- val *= self.get_parent().derotation_matrix
- return val
- @property
- def apn_matrix(self):
- """annotation appearance matrix"""
- try:
- CheckParent(self)
- annot = self.this
- assert isinstance(annot, mupdf.PdfAnnot)
- ap = mupdf.pdf_dict_getl(
- mupdf.pdf_annot_obj(annot),
- mupdf.PDF_ENUM_NAME_AP,
- mupdf.PDF_ENUM_NAME_N
- )
- if not ap.m_internal:
- return JM_py_from_matrix(mupdf.FzMatrix())
- mat = mupdf.pdf_dict_get_matrix(ap, mupdf.PDF_ENUM_NAME_Matrix)
- val = JM_py_from_matrix(mat)
- val = Matrix(val)
- return val
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- @property
- def blendmode(self):
- """annotation BlendMode"""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('BM'))
- blend_mode = None
- if obj.m_internal:
- blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(obj))
- return blend_mode
- # loop through the /AP/N/Resources/ExtGState objects
- obj = mupdf.pdf_dict_getl(
- annot_obj,
- PDF_NAME('AP'),
- PDF_NAME('N'),
- PDF_NAME('Resources'),
- PDF_NAME('ExtGState'),
- )
- if mupdf.pdf_is_dict(obj):
- n = mupdf.pdf_dict_len(obj)
- for i in range(n):
- obj1 = mupdf.pdf_dict_get_val(obj, i)
- if mupdf.pdf_is_dict(obj1):
- m = mupdf.pdf_dict_len(obj1)
- for j in range(m):
- obj2 = mupdf.pdf_dict_get_key(obj1, j)
- if mupdf.pdf_objcmp(obj2, PDF_NAME('BM')) == 0:
- blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(mupdf.pdf_dict_get_val(obj1, j)))
- return blend_mode
- return blend_mode
- @property
- def border(self):
- """Border information."""
- CheckParent(self)
- atype = self.type[0]
- if atype not in (
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- ):
- return dict()
- ao = mupdf.pdf_annot_obj(self.this)
- ret = JM_annot_border(ao)
- return ret
- def clean_contents(self, sanitize=1):
- """Clean appearance contents stream."""
- CheckParent(self)
- annot = self.this
- pdf = mupdf.pdf_get_bound_document(mupdf.pdf_annot_obj(annot))
- filter_ = _make_PdfFilterOptions(recurse=1, instance_forms=0, ascii=0, sanitize=sanitize)
- mupdf.pdf_filter_annot_contents(pdf, annot, filter_)
- @property
- def colors(self):
- """Color definitions."""
- try:
- CheckParent(self)
- annot = self.this
- assert isinstance(annot, mupdf.PdfAnnot)
- return JM_annot_colors(mupdf.pdf_annot_obj(annot))
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def delete_responses(self):
- """Delete 'Popup' and responding annotations."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- page = _pdf_annot_page(annot)
- while 1:
- irt_annot = JM_find_annot_irt(annot)
- if not irt_annot:
- break
- mupdf.pdf_delete_annot(page, irt_annot)
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('Popup'))
- annots = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Annots'))
- n = mupdf.pdf_array_len(annots)
- found = 0
- for i in range(n-1, -1, -1):
- o = mupdf.pdf_array_get(annots, i)
- p = mupdf.pdf_dict_get(o, PDF_NAME('Parent'))
- if not o.m_internal:
- continue
- if not mupdf.pdf_objcmp(p, annot_obj):
- mupdf.pdf_array_delete(annots, i)
- found = 1
- if found:
- mupdf.pdf_dict_put(page.obj(), PDF_NAME('Annots'), annots)
- @property
- def file_info(self):
- """Attached file information."""
- CheckParent(self)
- res = dict()
- length = -1
- size = -1
- desc = None
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- type_ = mupdf.pdf_annot_type(annot)
- if type_ != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
- raise TypeError( MSG_BAD_ANNOT_TYPE)
- stream = mupdf.pdf_dict_getl(
- annot_obj,
- PDF_NAME('FS'),
- PDF_NAME('EF'),
- PDF_NAME('F'),
- )
- if not stream.m_internal:
- RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
- fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
- o = mupdf.pdf_dict_get(fs, PDF_NAME('UF'))
- if o.m_internal:
- filename = mupdf.pdf_to_text_string(o)
- else:
- o = mupdf.pdf_dict_get(fs, PDF_NAME('F'))
- if o.m_internal:
- filename = mupdf.pdf_to_text_string(o)
- o = mupdf.pdf_dict_get(fs, PDF_NAME('Desc'))
- if o.m_internal:
- desc = mupdf.pdf_to_text_string(o)
- o = mupdf.pdf_dict_get(stream, PDF_NAME('Length'))
- if o.m_internal:
- length = mupdf.pdf_to_int(o)
- o = mupdf.pdf_dict_getl(stream, PDF_NAME('Params'), PDF_NAME('Size'))
- if o.m_internal:
- size = mupdf.pdf_to_int(o)
- res[ dictkey_filename] = JM_EscapeStrFromStr(filename)
- res[ dictkey_descr] = JM_UnicodeFromStr(desc)
- res[ dictkey_length] = length
- res[ dictkey_size] = size
- return res
- @property
- def flags(self):
- """Flags field."""
- CheckParent(self)
- annot = self.this
- return mupdf.pdf_annot_flags(annot)
- def get_file(self):
- """Retrieve attached file content."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- type = mupdf.pdf_annot_type(annot)
- if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
- raise TypeError( MSG_BAD_ANNOT_TYPE)
- stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
- if not stream.m_internal:
- RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
- buf = mupdf.pdf_load_stream(stream)
- res = JM_BinFromBuffer(buf)
- return res
- def get_oc(self):
- """Get annotation optional content reference."""
- CheckParent(self)
- oc = 0
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('OC'))
- if obj.m_internal:
- oc = mupdf.pdf_to_num(obj)
- return oc
- # PyMuPDF doesn't seem to have this .parent member, but removing it breaks
- # 11 tests...?
- #@property
- def get_parent(self):
- try:
- ret = getattr( self, 'parent')
- except AttributeError:
- page = _pdf_annot_page(self.this)
- assert isinstance( page, mupdf.PdfPage)
- document = Document( page.doc()) if page.m_internal else None
- ret = Page(page, document)
- #self.parent = weakref.proxy( ret)
- self.parent = ret
- #log(f'No attribute .parent: {type(self)=} {id(self)=}: have set {id(self.parent)=}.')
- #log( f'Have set self.parent')
- return ret
- def get_pixmap(self, matrix=None, dpi=None, colorspace=None, alpha=0):
- """annotation Pixmap"""
- CheckParent(self)
- cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK}
- if type(colorspace) is str:
- colorspace = cspaces.get(colorspace.lower(), None)
- if dpi:
- matrix = Matrix(dpi / 72, dpi / 72)
- ctm = JM_matrix_from_py(matrix)
- cs = colorspace
- if not cs:
- cs = mupdf.fz_device_rgb()
- pix = mupdf.pdf_new_pixmap_from_annot(self.this, ctm, cs, mupdf.FzSeparations(0), alpha)
- ret = Pixmap(pix)
- if dpi:
- ret.set_dpi(dpi, dpi)
- return ret
- def get_sound(self):
- """Retrieve sound stream."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- type = mupdf.pdf_annot_type(annot)
- sound = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Sound'))
- if type != mupdf.PDF_ANNOT_SOUND or not sound.m_internal:
- raise TypeError( MSG_BAD_ANNOT_TYPE)
- if mupdf.pdf_dict_get(sound, PDF_NAME('F')).m_internal:
- RAISEPY( "unsupported sound stream", JM_Exc_FileDataError)
- res = dict()
- obj = mupdf.pdf_dict_get(sound, PDF_NAME('R'))
- if obj.m_internal:
- res['rate'] = mupdf.pdf_to_real(obj)
- obj = mupdf.pdf_dict_get(sound, PDF_NAME('C'))
- if obj.m_internal:
- res['channels'] = mupdf.pdf_to_int(obj)
- obj = mupdf.pdf_dict_get(sound, PDF_NAME('B'))
- if obj.m_internal:
- res['bps'] = mupdf.pdf_to_int(obj)
- obj = mupdf.pdf_dict_get(sound, PDF_NAME('E'))
- if obj.m_internal:
- res['encoding'] = mupdf.pdf_to_name(obj)
- obj = mupdf.pdf_dict_gets(sound, "CO")
- if obj.m_internal:
- res['compression'] = mupdf.pdf_to_name(obj)
- buf = mupdf.pdf_load_stream(sound)
- stream = JM_BinFromBuffer(buf)
- res['stream'] = stream
- return res
-
- def get_text(self, *args, **kwargs):
- return utils.get_text(self, *args, **kwargs)
- def get_textbox(self, *args, **kwargs):
- return utils.get_textbox(self, *args, **kwargs)
- def get_textpage(self, clip=None, flags=0):
- """Make annotation TextPage."""
- CheckParent(self)
- options = mupdf.FzStextOptions(flags)
- if clip:
- assert hasattr(mupdf, 'FZ_STEXT_CLIP_RECT'), f'MuPDF-{mupdf_version} does not support FZ_STEXT_CLIP_RECT.'
- clip2 = JM_rect_from_py(clip)
- options.clip = clip2.internal()
- options.flags |= mupdf.FZ_STEXT_CLIP_RECT
- annot = self.this
- stextpage = mupdf.FzStextPage(annot, options)
- ret = TextPage(stextpage)
- p = self.get_parent()
- if isinstance(p, weakref.ProxyType):
- ret.parent = p
- else:
- ret.parent = weakref.proxy(p)
- return ret
- @property
- def has_popup(self):
- """Check if annotation has a Popup."""
- CheckParent(self)
- annot = self.this
- obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Popup'))
- return True if obj.m_internal else False
- @property
- def info(self):
- """Various information details."""
- CheckParent(self)
- annot = self.this
- res = dict()
- res[dictkey_content] = JM_UnicodeFromStr(mupdf.pdf_annot_contents(annot))
- o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Name'))
- res[dictkey_name] = JM_UnicodeFromStr(mupdf.pdf_to_name(o))
- # Title (= author)
- o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('T'))
- res[dictkey_title] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
- # CreationDate
- o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "CreationDate")
- res[dictkey_creationDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
- # ModDate
- o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('M'))
- res[dictkey_modDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
- # Subj
- o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "Subj")
- res[dictkey_subject] = mupdf.pdf_to_text_string(o)
- # Identification (PDF key /NM)
- o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM")
- res[dictkey_id] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
- return res
- @property
- def irt_xref(self):
- '''
- annotation IRT xref
- '''
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- irt = mupdf.pdf_dict_get( annot_obj, PDF_NAME('IRT'))
- if not irt.m_internal:
- return 0
- return mupdf.pdf_to_num( irt)
- @property
- def is_open(self):
- """Get 'open' status of annotation or its Popup."""
- CheckParent(self)
- return mupdf.pdf_annot_is_open(self.this)
- @property
- def language(self):
- """annotation language"""
- this_annot = self.this
- lang = mupdf.pdf_annot_language(this_annot)
- if lang == mupdf.FZ_LANG_UNSET:
- return
- assert hasattr(mupdf, 'fz_string_from_text_language2')
- return mupdf.fz_string_from_text_language2(lang)
- @property
- def line_ends(self):
- """Line end codes."""
- CheckParent(self)
- annot = self.this
- # return nothing for invalid annot types
- if not mupdf.pdf_annot_has_line_ending_styles(annot):
- return
- lstart = mupdf.pdf_annot_line_start_style(annot)
- lend = mupdf.pdf_annot_line_end_style(annot)
- return lstart, lend
- @property
- def next(self):
- """Next annotation."""
- CheckParent(self)
- this_annot = self.this
- assert isinstance(this_annot, mupdf.PdfAnnot)
- assert this_annot.m_internal
- type_ = mupdf.pdf_annot_type(this_annot)
- if type_ != mupdf.PDF_ANNOT_WIDGET:
- annot = mupdf.pdf_next_annot(this_annot)
- else:
- annot = mupdf.pdf_next_widget(this_annot)
- val = Annot(annot) if annot.m_internal else None
- if not val:
- return None
- val.thisown = True
- assert val.get_parent().this.m_internal_value() == self.get_parent().this.m_internal_value()
- val.parent._annot_refs[id(val)] = val
- if val.type[0] == mupdf.PDF_ANNOT_WIDGET:
- widget = Widget()
- TOOLS._fill_widget(val, widget)
- val = widget
- return val
- @property
- def opacity(self):
- """Opacity."""
- CheckParent(self)
- annot = self.this
- opy = -1
- ca = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_CA)
- if mupdf.pdf_is_number(ca):
- opy = mupdf.pdf_to_real(ca)
- return opy
- @property
- def popup_rect(self):
- """annotation 'Popup' rectangle"""
- CheckParent(self)
- rect = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Popup'))
- if obj.m_internal:
- rect = mupdf.pdf_dict_get_rect(obj, PDF_NAME('Rect'))
- #log( '{rect=}')
- val = JM_py_from_rect(rect)
- #log( '{val=}')
-
- val = Rect(val) * self.get_parent().transformation_matrix
- val *= self.get_parent().derotation_matrix
-
- return val
- @property
- def popup_xref(self):
- """annotation 'Popup' xref"""
- CheckParent(self)
- xref = 0
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Popup'))
- if obj.m_internal:
- xref = mupdf.pdf_to_num(obj)
- return xref
- @property
- def rect(self):
- """annotation rectangle"""
- if g_use_extra:
- val = extra.Annot_rect3( self.this)
- else:
- val = mupdf.pdf_bound_annot(self.this)
- val = Rect(val)
-
- # Caching self.parent_() reduces 1000x from 0.07 to 0.04.
- #
- p = self.get_parent()
- #p = getattr( self, 'parent', None)
- #if p is None:
- # p = self.parent
- # self.parent = p
- #p = self.parent_()
- val *= p.derotation_matrix
- return val
- @property
- def rect_delta(self):
- '''
- annotation delta values to rectangle
- '''
- annot_obj = mupdf.pdf_annot_obj(self.this)
- arr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('RD'))
- if mupdf.pdf_array_len( arr) == 4:
- return (
- mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 0)),
- mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 1)),
- -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 2)),
- -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 3)),
- )
- @property
- def rotation(self):
- """annotation rotation"""
- CheckParent(self)
- annot = self.this
- rotation = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_Rotate)
- if not rotation.m_internal:
- return -1
- return mupdf.pdf_to_int( rotation)
- def set_apn_bbox(self, bbox):
- """
- Set annotation appearance bbox.
- """
- CheckParent(self)
- page = self.get_parent()
- rot = page.rotation_matrix
- mat = page.transformation_matrix
- bbox *= rot * ~mat
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if not ap.m_internal:
- raise RuntimeError( MSG_BAD_APN)
- rect = JM_rect_from_py(bbox)
- mupdf.pdf_dict_put_rect(ap, PDF_NAME('BBox'), rect)
- def set_apn_matrix(self, matrix):
- """Set annotation appearance matrix."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if not ap.m_internal:
- raise RuntimeError( MSG_BAD_APN)
- mat = JM_matrix_from_py(matrix)
- mupdf.pdf_dict_put_matrix(ap, PDF_NAME('Matrix'), mat)
- def set_blendmode(self, blend_mode):
- """Set annotation BlendMode."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('BM'), blend_mode)
- def set_border(self, border=None, width=-1, style=None, dashes=None, clouds=-1):
- """Set border properties.
- Either a dict, or direct arguments width, style, dashes or clouds."""
- CheckParent(self)
- atype, atname = self.type[:2] # annotation type
- if atype not in (
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- ):
- message(f"Cannot set border for '{atname}'.")
- return None
- if atype not in (
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- ):
- if clouds > 0:
- message(f"Cannot set cloudy border for '{atname}'.")
- clouds = -1 # do not set border effect
- if type(border) is not dict:
- border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds}
- border.setdefault("width", -1)
- border.setdefault("style", None)
- border.setdefault("dashes", None)
- border.setdefault("clouds", -1)
- if border["width"] is None:
- border["width"] = -1
- if border["clouds"] is None:
- border["clouds"] = -1
- if hasattr(border["dashes"], "__getitem__"): # ensure sequence items are integers
- border["dashes"] = tuple(border["dashes"])
- for item in border["dashes"]:
- if not isinstance(item, int):
- border["dashes"] = None
- break
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- pdf = mupdf.pdf_get_bound_document( annot_obj)
- return JM_annot_set_border( border, pdf, annot_obj)
- def set_colors(self, colors=None, stroke=None, fill=None):
- """Set 'stroke' and 'fill' colors.
- Use either a dict or the direct arguments.
- """
- if self.type[0] == mupdf.PDF_ANNOT_FREE_TEXT:
- raise ValueError("cannot be used for FreeText annotations")
- CheckParent(self)
- doc = self.get_parent().parent
- if type(colors) is not dict:
- colors = {"fill": fill, "stroke": stroke}
- fill = colors.get("fill")
- stroke = colors.get("stroke")
- fill_annots = (mupdf.PDF_ANNOT_CIRCLE, mupdf.PDF_ANNOT_SQUARE, mupdf.PDF_ANNOT_LINE, mupdf.PDF_ANNOT_POLY_LINE, mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_REDACT,)
- if stroke in ([], ()):
- doc.xref_set_key(self.xref, "C", "[]")
- elif stroke is not None:
- if hasattr(stroke, "__float__"):
- stroke = [float(stroke)]
- CheckColor(stroke)
- assert len(stroke) in (1, 3, 4)
- s = f"[{_format_g(stroke)}]"
- doc.xref_set_key(self.xref, "C", s)
- if fill and self.type[0] not in fill_annots:
- message("Warning: fill color ignored for annot type '%s'." % self.type[1])
- return
- if fill in ([], ()):
- doc.xref_set_key(self.xref, "IC", "[]")
- elif fill is not None:
- if hasattr(fill, "__float__"):
- fill = [float(fill)]
- CheckColor(fill)
- assert len(fill) in (1, 3, 4)
- s = f"[{_format_g(fill)}]"
- doc.xref_set_key(self.xref, "IC", s)
- def set_flags(self, flags):
- """Set annotation flags."""
- CheckParent(self)
- annot = self.this
- mupdf.pdf_set_annot_flags(annot, flags)
- def set_info(self, info=None, content=None, title=None, creationDate=None, modDate=None, subject=None):
- """Set various properties."""
- CheckParent(self)
- if type(info) is dict: # build the args from the dictionary
- content = info.get("content", None)
- title = info.get("title", None)
- creationDate = info.get("creationDate", None)
- modDate = info.get("modDate", None)
- subject = info.get("subject", None)
- info = None
- annot = self.this
- # use this to indicate a 'markup' annot type
- is_markup = mupdf.pdf_annot_has_author(annot)
- # contents
- if content:
- mupdf.pdf_set_annot_contents(annot, content)
- if is_markup:
- # title (= author)
- if title:
- mupdf.pdf_set_annot_author(annot, title)
- # creation date
- if creationDate:
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('CreationDate'), creationDate)
- # mod date
- if modDate:
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('M'), modDate)
- # subject
- if subject:
- mupdf.pdf_dict_puts(mupdf.pdf_annot_obj(annot), "Subj", mupdf.pdf_new_text_string(subject))
- def set_irt_xref(self, xref):
- '''
- Set annotation IRT xref
- '''
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- page = _pdf_annot_page(annot)
- if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()):
- raise ValueError( MSG_BAD_XREF)
- irt = mupdf.pdf_new_indirect( page.doc(), xref, 0)
- subt = mupdf.pdf_dict_get( irt, PDF_NAME('Subtype'))
- irt_subt = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subt))
- if irt_subt < 0:
- raise ValueError( MSG_IS_NO_ANNOT)
- mupdf.pdf_dict_put( annot_obj, PDF_NAME('IRT'), irt)
- def set_language(self, language=None):
- """Set annotation language."""
- CheckParent(self)
- this_annot = self.this
- if not language:
- lang = mupdf.FZ_LANG_UNSET
- else:
- lang = mupdf.fz_text_language_from_string(language)
- mupdf.pdf_set_annot_language(this_annot, lang)
- def set_line_ends(self, start, end):
- """Set line end codes."""
- CheckParent(self)
- annot = self.this
- if mupdf.pdf_annot_has_line_ending_styles(annot):
- mupdf.pdf_set_annot_line_ending_styles(annot, start, end)
- else:
- message_warning("bad annot type for line ends")
- def set_name(self, name):
- """Set /Name (icon) of annotation."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('Name'), name)
- def set_oc(self, oc=0):
- """Set / remove annotation OC xref."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- if not oc:
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('OC'))
- else:
- JM_add_oc_object(mupdf.pdf_get_bound_document(annot_obj), annot_obj, oc)
- def set_opacity(self, opacity):
- """Set opacity."""
- CheckParent(self)
- annot = self.this
- if not _INRANGE(opacity, 0.0, 1.0):
- mupdf.pdf_set_annot_opacity(annot, 1)
- return
- mupdf.pdf_set_annot_opacity(annot, opacity)
- if opacity < 1.0:
- page = _pdf_annot_page(annot)
- page.transparency = 1
- def set_open(self, is_open):
- """Set 'open' status of annotation or its Popup."""
- CheckParent(self)
- annot = self.this
- mupdf.pdf_set_annot_is_open(annot, is_open)
- def set_popup(self, rect):
- '''
- Create annotation 'Popup' or update rectangle.
- '''
- CheckParent(self)
- annot = self.this
- pdfpage = _pdf_annot_page(annot)
- rot = JM_rotate_page_matrix(pdfpage)
- r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
- mupdf.pdf_set_annot_popup(annot, r)
- def set_rect(self, rect):
- """Set annotation rectangle."""
- CheckParent(self)
- annot = self.this
-
- pdfpage = _pdf_annot_page(annot)
- rot = JM_rotate_page_matrix(pdfpage)
- r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
- if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r):
- raise ValueError( MSG_BAD_RECT)
- try:
- mupdf.pdf_set_annot_rect(annot, r)
- except Exception as e:
- message(f'cannot set rect: {e}')
- return False
- def set_rotation(self, rotate=0):
- """Set annotation rotation."""
- CheckParent(self)
-
- annot = self.this
- type = mupdf.pdf_annot_type(annot)
- if type not in (
- mupdf.PDF_ANNOT_CARET,
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_FILE_ATTACHMENT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- mupdf.PDF_ANNOT_STAMP,
- mupdf.PDF_ANNOT_TEXT,
- ):
- return
- rot = rotate
- while rot < 0:
- rot += 360
- while rot >= 360:
- rot -= 360
- if type == mupdf.PDF_ANNOT_FREE_TEXT and rot % 90 != 0:
- rot = 0
- annot_obj = mupdf.pdf_annot_obj(annot)
- mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rot)
- @property
- def type(self):
- """annotation type"""
- CheckParent(self)
- if not self.this.m_internal:
- return 'null'
- type_ = mupdf.pdf_annot_type(self.this)
- c = mupdf.pdf_string_from_annot_type(type_)
- o = mupdf.pdf_dict_gets( mupdf.pdf_annot_obj(self.this), 'IT')
- if not o.m_internal or mupdf.pdf_is_name(o):
- return (type_, c)
- it = mupdf.pdf_to_name(o)
- return (type_, c, it)
- def update(self,
- blend_mode: OptStr =None,
- opacity: OptFloat =None,
- fontsize: float =0,
- fontname: OptStr =None,
- text_color: OptSeq =None,
- border_color: OptSeq =None,
- fill_color: OptSeq =None,
- cross_out: bool =True,
- rotate: int =-1,
- ):
- """Update annot appearance.
- Notes:
- Depending on the annot type, some parameters make no sense,
- while others are only available in this method to achieve the
- desired result. This is especially true for 'FreeText' annots.
- Args:
- blend_mode: set the blend mode, all annotations.
- opacity: set the opacity, all annotations.
- fontsize: set fontsize, 'FreeText' only.
- fontname: set the font, 'FreeText' only.
- border_color: set border color, 'FreeText' only.
- text_color: set text color, 'FreeText' only.
- fill_color: set fill color, all annotations.
- cross_out: draw diagonal lines, 'Redact' only.
- rotate: set rotation, 'FreeText' and some others.
- """
- annot_obj = mupdf.pdf_annot_obj(self.this)
-
- if border_color:
- is_rich_text = mupdf.pdf_dict_get(annot_obj, PDF_NAME("RC"))
- if not is_rich_text:
- raise ValueError("cannot set border_color if rich_text is False")
- Annot.update_timing_test()
- CheckParent(self)
- def color_string(cs, code):
- """Return valid PDF color operator for a given color sequence.
- """
- cc = ColorCode(cs, code)
- if not cc:
- return b""
- return (cc + "\n").encode()
- annot_type = self.type[0] # get the annot type
- dt = self.border.get("dashes", None) # get the dashes spec
- bwidth = self.border.get("width", -1) # get border line width
- stroke = self.colors["stroke"] # get the stroke color
- if fill_color is not None:
- fill = fill_color
- else:
- fill = self.colors["fill"]
- rect = None # self.rect # prevent MuPDF fiddling with it
- apnmat = self.apn_matrix # prevent MuPDF fiddling with it
- if rotate != -1: # sanitize rotation value
- while rotate < 0:
- rotate += 360
- while rotate >= 360:
- rotate -= 360
- if annot_type == mupdf.PDF_ANNOT_FREE_TEXT and rotate % 90 != 0:
- rotate = 0
- #------------------------------------------------------------------
- # handle opacity and blend mode
- #------------------------------------------------------------------
- if blend_mode is None:
- blend_mode = self.blendmode
- if not hasattr(opacity, "__float__"):
- opacity = self.opacity
- if 0 <= opacity < 1 or blend_mode:
- opa_code = "/H gs\n" # then we must reference this 'gs'
- else:
- opa_code = ""
- if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
- CheckColor(text_color)
- CheckColor(fill_color)
- tcol, fname, fsize = TOOLS._parse_da(self)
- # read and update default appearance as necessary
- if fsize <= 0:
- fsize = 12
- if text_color:
- tcol = text_color
- if fontname:
- fname = fontname
- if fontsize > 0:
- fsize = fontsize
- JM_make_annot_DA(self, len(tcol), tcol, fname, fsize)
- blend_mode = None # not supported for free text annotations!
- #------------------------------------------------------------------
- # now invoke MuPDF to update the annot appearance
- #------------------------------------------------------------------
- val = self._update_appearance(
- opacity=opacity,
- blend_mode=blend_mode,
- fill_color=fill,
- rotate=rotate,
- )
- if val is False:
- raise RuntimeError("Error updating annotation.")
- if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
- # in absence of previous opacity, we may need to modify the AP
- ap = self._getAP()
- if 0 <= opacity < 1 and not ap.startswith(b"/H gs"):
- self._setAP(b"/H gs\n" + ap)
- return
- bfill = color_string(fill, "f")
- bstroke = color_string(stroke, "c")
- p_ctm = self.get_parent().transformation_matrix
- imat = ~p_ctm # inverse page transf. matrix
- if dt:
- dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n"
- dashes = dashes.encode("utf-8")
- else:
- dashes = None
- if self.line_ends:
- line_end_le, line_end_ri = self.line_ends
- else:
- line_end_le, line_end_ri = 0, 0 # init line end codes
- # read contents as created by MuPDF
- ap = self._getAP()
- ap_tab = ap.splitlines() # split in single lines
- ap_updated = False # assume we did nothing
- if annot_type == mupdf.PDF_ANNOT_REDACT:
- if cross_out: # create crossed-out rect
- ap_updated = True
- ap_tab = ap_tab[:-1]
- _, LL, LR, UR, UL = ap_tab
- ap_tab.append(LR)
- ap_tab.append(LL)
- ap_tab.append(UR)
- ap_tab.append(LL)
- ap_tab.append(UL)
- ap_tab.append(b"S")
- if bwidth > 0 or bstroke != b"":
- ap_updated = True
- ntab = [_format_g(bwidth).encode() + b" w"] if bwidth > 0 else []
- for line in ap_tab:
- if line.endswith(b"w"):
- continue
- if line.endswith(b"RG") and bstroke != b"":
- line = bstroke[:-1]
- ntab.append(line)
- ap_tab = ntab
- ap = b"\n".join(ap_tab)
- if annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
- ap = b"\n".join(ap_tab[:-1]) + b"\n"
- ap_updated = True
- if bfill != b"":
- if annot_type == mupdf.PDF_ANNOT_POLYGON:
- ap = ap + bfill + b"b" # close, fill, and stroke
- elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
- ap = ap + b"S" # stroke
- else:
- if annot_type == mupdf.PDF_ANNOT_POLYGON:
- ap = ap + b"s" # close and stroke
- elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
- ap = ap + b"S" # stroke
- if dashes is not None: # handle dashes
- ap = dashes + ap
- # reset dashing - only applies for LINE annots with line ends given
- ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1)
- ap_updated = True
- if opa_code:
- ap = opa_code.encode("utf-8") + ap
- ap_updated = True
- ap = b"q\n" + ap + b"\nQ\n"
- #----------------------------------------------------------------------
- # the following handles line end symbols for 'Polygon' and 'Polyline'
- #----------------------------------------------------------------------
- if line_end_le + line_end_ri > 0 and annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
- le_funcs = (None, TOOLS._le_square, TOOLS._le_circle,
- TOOLS._le_diamond, TOOLS._le_openarrow,
- TOOLS._le_closedarrow, TOOLS._le_butt,
- TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow,
- TOOLS._le_slash)
- le_funcs_range = range(1, len(le_funcs))
- d = 2 * max(1, self.border["width"])
- rect = self.rect + (-d, -d, d, d)
- ap_updated = True
- points = self.vertices
- if line_end_le in le_funcs_range:
- p1 = Point(points[0]) * imat
- p2 = Point(points[1]) * imat
- left = le_funcs[line_end_le](self, p1, p2, False, fill_color)
- ap += left.encode()
- if line_end_ri in le_funcs_range:
- p1 = Point(points[-2]) * imat
- p2 = Point(points[-1]) * imat
- left = le_funcs[line_end_ri](self, p1, p2, True, fill_color)
- ap += left.encode()
- if ap_updated:
- if rect: # rect modified here?
- self.set_rect(rect)
- self._setAP(ap, rect=1)
- else:
- self._setAP(ap, rect=0)
- #-------------------------------
- # handle annotation rotations
- #-------------------------------
- if annot_type not in ( # only these types are supported
- mupdf.PDF_ANNOT_CARET,
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FILE_ATTACHMENT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- mupdf.PDF_ANNOT_STAMP,
- mupdf.PDF_ANNOT_TEXT,
- ):
- return
- rot = self.rotation # get value from annot object
- if rot == -1: # nothing to change
- return
- M = (self.rect.tl + self.rect.br) / 2 # center of annot rect
- if rot == 0: # undo rotations
- if abs(apnmat - Matrix(1, 1)) < 1e-5:
- return # matrix already is a no-op
- quad = self.rect.morph(M, ~apnmat) # derotate rect
- self.setRect(quad.rect)
- self.set_apn_matrix(Matrix(1, 1)) # appearance matrix = no-op
- return
- mat = Matrix(rot)
- quad = self.rect.morph(M, mat)
- self.set_rect(quad.rect)
- self.set_apn_matrix(apnmat * mat)
- def update_file(self, buffer_=None, filename=None, ufilename=None, desc=None):
- """Update attached file."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- pdf = mupdf.pdf_get_bound_document(annot_obj) # the owning PDF
- type = mupdf.pdf_annot_type(annot)
- if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
- raise TypeError( MSG_BAD_ANNOT_TYPE)
- stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
- # the object for file content
- if not stream.m_internal:
- RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
- fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
- # file content given
- res = JM_BufferFromBytes(buffer_)
- if buffer_ and not res.m_internal:
- raise ValueError( MSG_BAD_BUFFER)
- if res:
- JM_update_stream(pdf, stream, res, 1)
- # adjust /DL and /Size parameters
- len, _ = mupdf.fz_buffer_storage(res)
- l = mupdf.pdf_new_int(len)
- mupdf.pdf_dict_put(stream, PDF_NAME('DL'), l)
- mupdf.pdf_dict_putl(stream, l, PDF_NAME('Params'), PDF_NAME('Size'))
- if filename:
- mupdf.pdf_dict_put_text_string(stream, PDF_NAME('F'), filename)
- mupdf.pdf_dict_put_text_string(fs, PDF_NAME('F'), filename)
- mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), filename)
- mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), filename)
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('Contents'), filename)
- if ufilename:
- mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), ufilename)
- mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), ufilename)
- if desc:
- mupdf.pdf_dict_put_text_string(stream, PDF_NAME('Desc'), desc)
- mupdf.pdf_dict_put_text_string(fs, PDF_NAME('Desc'), desc)
- @staticmethod
- def update_timing_test():
- total = 0
- for i in range( 30*1000):
- total += i
- return total
-
- @property
- def vertices(self):
- """annotation vertex points"""
- CheckParent(self)
- annot = self.this
- assert isinstance(annot, mupdf.PdfAnnot)
- annot_obj = mupdf.pdf_annot_obj(annot)
- page = _pdf_annot_page(annot)
- page_ctm = mupdf.FzMatrix() # page transformation matrix
- dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform().
- mupdf.pdf_page_transform(page, dummy, page_ctm)
- derot = JM_derotate_page_matrix(page)
- page_ctm = mupdf.fz_concat(page_ctm, derot)
- #----------------------------------------------------------------
- # The following objects occur in different annotation types.
- # So we are sure that (!o) occurs at most once.
- # Every pair of floats is one point, that needs to be separately
- # transformed with the page transformation matrix.
- #----------------------------------------------------------------
- o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Vertices'))
- if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L'))
- if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints'))
- if not o.m_internal: o = mupdf.pdf_dict_gets(annot_obj, 'CL')
-
- if o.m_internal:
- # handle lists with 1-level depth
- # weiter
- res = []
- for i in range(0, mupdf.pdf_array_len(o), 2):
- x = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i))
- y = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i+1))
- point = mupdf.FzPoint(x, y)
- point = mupdf.fz_transform_point(point, page_ctm)
- res.append( (point.x, point.y))
- return res
-
- o = mupdf.pdf_dict_gets(annot_obj, 'InkList')
- if o.m_internal:
- # InkList has 2-level lists
- #inklist:
- res = []
- for i in range(mupdf.pdf_array_len(o)):
- res1 = []
- o1 = mupdf.pdf_array_get(o, i)
- for j in range(0, mupdf.pdf_array_len(o1), 2):
- x = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j))
- y = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j+1))
- point = mupdf.FzPoint(x, y)
- point = mupdf.fz_transform_point(point, page_ctm)
- res1.append( (point.x, point.y))
- res.append(res1)
- return res
- @property
- def xref(self):
- """annotation xref number"""
- CheckParent(self)
- annot = self.this
- return mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot))
- class Archive:
- def __init__( self, *args):
- '''
- Archive(dirname [, path]) - from folder
- Archive(file [, path]) - from file name or object
- Archive(data, name) - from memory item
- Archive() - empty archive
- Archive(archive [, path]) - from archive
- '''
- self._subarchives = list()
- self.this = mupdf.fz_new_multi_archive()
- if args:
- self.add( *args)
-
- def __repr__( self):
- return f'Archive, sub-archives: {len(self._subarchives)}'
- def _add_arch( self, subarch, path=None):
- mupdf.fz_mount_multi_archive( self.this, subarch, path)
-
- def _add_dir( self, folder, path=None):
- sub = mupdf.fz_open_directory( folder)
- mupdf.fz_mount_multi_archive( self.this, sub, path)
-
- def _add_treeitem( self, memory, name, path=None):
- buff = JM_BufferFromBytes( memory)
- sub = mupdf.fz_new_tree_archive( mupdf.FzTree())
- mupdf.fz_tree_archive_add_buffer( sub, name, buff)
- mupdf.fz_mount_multi_archive( self.this, sub, path)
-
- def _add_ziptarfile( self, filepath, type_, path=None):
- if type_ == 1:
- sub = mupdf.fz_open_zip_archive( filepath)
- else:
- sub = mupdf.fz_open_tar_archive( filepath)
- mupdf.fz_mount_multi_archive( self.this, sub, path)
-
- def _add_ziptarmemory( self, memory, type_, path=None):
- buff = JM_BufferFromBytes( memory)
- stream = mupdf.fz_open_buffer( buff)
- if type_==1:
- sub = mupdf.fz_open_zip_archive_with_stream( stream)
- else:
- sub = mupdf.fz_open_tar_archive_with_stream( stream)
- mupdf.fz_mount_multi_archive( self.this, sub, path)
-
- def add( self, content, path=None):
- '''
- Add a sub-archive.
- Args:
- content:
- The content to be added. May be one of:
- `str` - must be path of directory or file.
- `bytes`, `bytearray`, `io.BytesIO` - raw data.
- `zipfile.Zipfile`.
- `tarfile.TarFile`.
- `pymupdf.Archive`.
- A two-item tuple `(data, name)`.
- List or tuple (but not tuple with length 2) of the above.
- path: (str) a "virtual" path name, under which the elements
- of content can be retrieved. Use it to e.g. cope with
- duplicate element names.
- '''
- def is_binary_data(x):
- return isinstance(x, (bytes, bytearray, io.BytesIO))
- def make_subarch(entries, mount, fmt):
- subarch = dict(fmt=fmt, entries=entries, path=mount)
- if fmt != "tree" or self._subarchives == []:
- self._subarchives.append(subarch)
- else:
- ltree = self._subarchives[-1]
- if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]:
- self._subarchives.append(subarch)
- else:
- ltree["entries"].extend(subarch["entries"])
- self._subarchives[-1] = ltree
- if isinstance(content, pathlib.Path):
- content = str(content)
-
- if isinstance(content, str):
- if os.path.isdir(content):
- self._add_dir(content, path)
- return make_subarch(os.listdir(content), path, 'dir')
- elif os.path.isfile(content):
- assert isinstance(path, str) and path != '', \
- f'Need name for binary content, but {path=}.'
- with io.open(content, 'rb') as f:
- ff = f.read()
- self._add_treeitem(ff, path)
- return make_subarch([path], None, 'tree')
- else:
- raise ValueError(f'Not a file or directory: {content!r}')
- elif is_binary_data(content):
- assert isinstance(path, str) and path != '' \
- f'Need name for binary content, but {path=}.'
- self._add_treeitem(content, path)
- return make_subarch([path], None, 'tree')
- elif isinstance(content, zipfile.ZipFile):
- filename = getattr(content, "filename", None)
- if filename is None:
- fp = content.fp.getvalue()
- self._add_ziptarmemory(fp, 1, path)
- else:
- self._add_ziptarfile(filename, 1, path)
- return make_subarch(content.namelist(), path, 'zip')
- elif isinstance(content, tarfile.TarFile):
- filename = getattr(content.fileobj, "name", None)
- if filename is None:
- fp = content.fileobj
- if not isinstance(fp, io.BytesIO):
- fp = fp.fileobj
- self._add_ziptarmemory(fp.getvalue(), 0, path)
- else:
- self._add_ziptarfile(filename, 0, path)
- return make_subarch(content.getnames(), path, 'tar')
- elif isinstance(content, Archive):
- self._add_arch(content, path)
- return make_subarch([], path, 'multi')
-
- if isinstance(content, tuple) and len(content) == 2:
- # covers the tree item plus path
- data, name = content
- assert isinstance(name, str), f'Unexpected {type(name)=}'
- if is_binary_data(data):
- self._add_treeitem(data, name, path=path)
- elif isinstance(data, str):
- if os.path.isfile(data):
- with io.open(data, 'rb') as f:
- ff = f.read()
- self._add_treeitem(ff, name, path=path)
- else:
- assert 0, f'Unexpected {type(data)=}.'
- return make_subarch([name], path, 'tree')
-
- elif hasattr(content, '__getitem__'):
- # Deal with sequence of disparate items.
- for item in content:
- self.add(item, path)
- return
-
- else:
- raise TypeError(f'Unrecognised type {type(content)}.')
- assert 0
- @property
- def entry_list( self):
- '''
- List of sub archives.
- '''
- return self._subarchives
-
- def has_entry( self, name):
- return mupdf.fz_has_archive_entry( self.this, name)
-
- def read_entry( self, name):
- buff = mupdf.fz_read_archive_entry( self.this, name)
- return JM_BinFromBuffer( buff)
- class Xml:
- def __enter__(self):
- return self
- def __exit__(self, *args):
- pass
- def __init__(self, rhs):
- if isinstance(rhs, mupdf.FzXml):
- self.this = rhs
- elif isinstance(rhs, str):
- buff = mupdf.fz_new_buffer_from_copied_data(rhs)
- self.this = mupdf.fz_parse_xml_from_html5(buff)
- else:
- assert 0, f'Unsupported type for rhs: {type(rhs)}'
-
- def _get_node_tree( self):
- def show_node(node, items, shift):
- while node is not None:
- if node.is_text:
- items.append((shift, f'"{node.text}"'))
- node = node.next
- continue
- items.append((shift, f"({node.tagname}"))
- for k, v in node.get_attributes().items():
- items.append((shift, f"={k} '{v}'"))
- child = node.first_child
- if child:
- items = show_node(child, items, shift + 1)
- items.append((shift, f"){node.tagname}"))
- node = node.next
- return items
- shift = 0
- items = []
- items = show_node(self, items, shift)
- return items
-
- def add_bullet_list(self):
- """Add bulleted list ("ul" tag)"""
- child = self.create_element("ul")
- self.append_child(child)
- return child
- def add_class(self, text):
- """Set some class via CSS. Replaces complete class spec."""
- cls = self.get_attribute_value("class")
- if cls is not None and text in cls:
- return self
- self.remove_attribute("class")
- if cls is None:
- cls = text
- else:
- cls += " " + text
- self.set_attribute("class", cls)
- return self
- def add_code(self, text=None):
- """Add a "code" tag"""
- child = self.create_element("code")
- if type(text) is str:
- child.append_child(self.create_text_node(text))
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(child)
- return self
- def add_codeblock(self):
- """Add monospaced lines ("pre" node)"""
- child = self.create_element("pre")
- self.append_child(child)
- return child
- def add_description_list(self):
- """Add description list ("dl" tag)"""
- child = self.create_element("dl")
- self.append_child(child)
- return child
- def add_division(self):
- """Add "div" tag"""
- child = self.create_element("div")
- self.append_child(child)
- return child
- def add_header(self, level=1):
- """Add header tag"""
- if level not in range(1, 7):
- raise ValueError("Header level must be in [1, 6]")
- this_tag = self.tagname
- new_tag = f"h{level}"
- child = self.create_element(new_tag)
- if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"):
- self.append_child(child)
- return child
- self.parent.append_child(child)
- return child
- def add_horizontal_line(self):
- """Add horizontal line ("hr" tag)"""
- child = self.create_element("hr")
- self.append_child(child)
- return child
- def add_image(self, name, width=None, height=None, imgfloat=None, align=None):
- """Add image node (tag "img")."""
- child = self.create_element("img")
- if width is not None:
- child.set_attribute("width", f"{width}")
- if height is not None:
- child.set_attribute("height", f"{height}")
- if imgfloat is not None:
- child.set_attribute("style", f"float: {imgfloat}")
- if align is not None:
- child.set_attribute("align", f"{align}")
- child.set_attribute("src", f"{name}")
- self.append_child(child)
- return child
- def add_link(self, href, text=None):
- """Add a hyperlink ("a" tag)"""
- child = self.create_element("a")
- if not isinstance(text, str):
- text = href
- child.set_attribute("href", href)
- child.append_child(self.create_text_node(text))
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(child)
- return self
- def add_list_item(self):
- """Add item ("li" tag) under a (numbered or bulleted) list."""
- if self.tagname not in ("ol", "ul"):
- raise ValueError("cannot add list item to", self.tagname)
- child = self.create_element("li")
- self.append_child(child)
- return child
- def add_number_list(self, start=1, numtype=None):
- """Add numbered list ("ol" tag)"""
- child = self.create_element("ol")
- if start > 1:
- child.set_attribute("start", str(start))
- if numtype is not None:
- child.set_attribute("type", numtype)
- self.append_child(child)
- return child
- def add_paragraph(self):
- """Add "p" tag"""
- child = self.create_element("p")
- if self.tagname != "p":
- self.append_child(child)
- else:
- self.parent.append_child(child)
- return child
- def add_span(self):
- child = self.create_element("span")
- self.append_child(child)
- return child
- def add_style(self, text):
- """Set some style via CSS style. Replaces complete style spec."""
- style = self.get_attribute_value("style")
- if style is not None and text in style:
- return self
- self.remove_attribute("style")
- if style is None:
- style = text
- else:
- style += ";" + text
- self.set_attribute("style", style)
- return self
- def add_subscript(self, text=None):
- """Add a subscript ("sub" tag)"""
- child = self.create_element("sub")
- if type(text) is str:
- child.append_child(self.create_text_node(text))
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(child)
- return self
- def add_superscript(self, text=None):
- """Add a superscript ("sup" tag)"""
- child = self.create_element("sup")
- if type(text) is str:
- child.append_child(self.create_text_node(text))
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(child)
- return self
- def add_text(self, text):
- """Add text. Line breaks are honored."""
- lines = text.splitlines()
- line_count = len(lines)
- prev = self.span_bottom()
- if prev is None:
- prev = self
- for i, line in enumerate(lines):
- prev.append_child(self.create_text_node(line))
- if i < line_count - 1:
- prev.append_child(self.create_element("br"))
- return self
- def append_child( self, child):
- mupdf.fz_dom_append_child( self.this, child.this)
-
- def append_styled_span(self, style):
- span = self.create_element("span")
- span.add_style(style)
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(span)
- return prev
- def bodytag( self):
- return Xml( mupdf.fz_dom_body( self.this))
-
- def clone( self):
- ret = mupdf.fz_dom_clone( self.this)
- return Xml( ret)
-
- @staticmethod
- def color_text(color):
- if type(color) is str:
- return color
- if type(color) is int:
- return f"rgb({sRGB_to_rgb(color)})"
- if type(color) in (tuple, list):
- return f"rgb{tuple(color)}"
- return color
- def create_element( self, tag):
- return Xml( mupdf.fz_dom_create_element( self.this, tag))
-
- def create_text_node( self, text):
- return Xml( mupdf.fz_dom_create_text_node( self.this, text))
-
- def debug(self):
- """Print a list of the node tree below self."""
- items = self._get_node_tree()
- for item in items:
- message(" " * item[0] + item[1].replace("\n", "\\n"))
- def find( self, tag, att, match):
- ret = mupdf.fz_dom_find( self.this, tag, att, match)
- if ret.m_internal:
- return Xml( ret)
-
- def find_next( self, tag, att, match):
- ret = mupdf.fz_dom_find_next( self.this, tag, att, match)
- if ret.m_internal:
- return Xml( ret)
-
- @property
- def first_child( self):
- if mupdf.fz_xml_text( self.this):
- # text node, has no child.
- return
- ret = mupdf.fz_dom_first_child( self)
- if ret.m_internal:
- return Xml( ret)
-
- def get_attribute_value( self, key):
- assert key
- return mupdf.fz_dom_attribute( self.this, key)
-
- def get_attributes( self):
- if mupdf.fz_xml_text( self.this):
- # text node, has no attributes.
- return
- result = dict()
- i = 0
- while 1:
- val, key = mupdf.fz_dom_get_attribute( self.this, i)
- if not val or not key:
- break
- result[ key] = val
- i += 1
- return result
-
- def insert_after( self, node):
- mupdf.fz_dom_insert_after( self.this, node.this)
-
- def insert_before( self, node):
- mupdf.fz_dom_insert_before( self.this, node.this)
-
- def insert_text(self, text):
- lines = text.splitlines()
- line_count = len(lines)
- for i, line in enumerate(lines):
- self.append_child(self.create_text_node(line))
- if i < line_count - 1:
- self.append_child(self.create_element("br"))
- return self
- @property
- def is_text(self):
- """Check if this is a text node."""
- return self.text is not None
- @property
- def last_child(self):
- """Return last child node."""
- child = self.first_child
- if child is None:
- return None
- while True:
- next = child.next
- if not next:
- return child
- child = next
- @property
- def next( self):
- ret = mupdf.fz_dom_next( self.this)
- if ret.m_internal:
- return Xml( ret)
-
- @property
- def parent( self):
- ret = mupdf.fz_dom_parent( self.this)
- if ret.m_internal:
- return Xml( ret)
-
- @property
- def previous( self):
- ret = mupdf.fz_dom_previous( self.this)
- if ret.m_internal:
- return Xml( ret)
-
- def remove( self):
- mupdf.fz_dom_remove( self.this)
-
- def remove_attribute( self, key):
- assert key
- mupdf.fz_dom_remove_attribute( self.this, key)
-
- @property
- def root( self):
- return Xml( mupdf.fz_xml_root( self.this))
-
- def set_align(self, align):
- """Set text alignment via CSS style"""
- text = "text-align: %s"
- if isinstance( align, str):
- t = align
- elif align == TEXT_ALIGN_LEFT:
- t = "left"
- elif align == TEXT_ALIGN_CENTER:
- t = "center"
- elif align == TEXT_ALIGN_RIGHT:
- t = "right"
- elif align == TEXT_ALIGN_JUSTIFY:
- t = "justify"
- else:
- raise ValueError(f"Unrecognised {align=}")
- text = text % t
- self.add_style(text)
- return self
- def set_attribute( self, key, value):
- assert key
- mupdf.fz_dom_add_attribute( self.this, key, value)
-
- def set_bgcolor(self, color):
- """Set background color via CSS style"""
- text = f"background-color: %s" % self.color_text(color)
- self.add_style(text) # does not work on span level
- return self
- def set_bold(self, val=True):
- """Set bold on / off via CSS style"""
- if val:
- val="bold"
- else:
- val="normal"
- text = "font-weight: %s" % val
- self.append_styled_span(text)
- return self
- def set_color(self, color):
- """Set text color via CSS style"""
- text = f"color: %s" % self.color_text(color)
- self.append_styled_span(text)
- return self
- def set_columns(self, cols):
- """Set number of text columns via CSS style"""
- text = f"columns: {cols}"
- self.append_styled_span(text)
- return self
- def set_font(self, font):
- """Set font-family name via CSS style"""
- text = "font-family: %s" % font
- self.append_styled_span(text)
- return self
- def set_fontsize(self, fontsize):
- """Set font size name via CSS style"""
- if type(fontsize) is str:
- px=""
- else:
- px="px"
- text = f"font-size: {fontsize}{px}"
- self.append_styled_span(text)
- return self
- def set_id(self, unique):
- """Set a unique id."""
- # check uniqueness
- root = self.root
- if root.find(None, "id", unique):
- raise ValueError(f"id '{unique}' already exists")
- self.set_attribute("id", unique)
- return self
- def set_italic(self, val=True):
- """Set italic on / off via CSS style"""
- if val:
- val="italic"
- else:
- val="normal"
- text = "font-style: %s" % val
- self.append_styled_span(text)
- return self
- def set_leading(self, leading):
- """Set inter-line spacing value via CSS style - block-level only."""
- text = f"-mupdf-leading: {leading}"
- self.add_style(text)
- return self
- def set_letter_spacing(self, spacing):
- """Set inter-letter spacing value via CSS style"""
- text = f"letter-spacing: {spacing}"
- self.append_styled_span(text)
- return self
- def set_lineheight(self, lineheight):
- """Set line height name via CSS style - block-level only."""
- text = f"line-height: {lineheight}"
- self.add_style(text)
- return self
- def set_margins(self, val):
- """Set margin values via CSS style"""
- text = "margins: %s" % val
- self.append_styled_span(text)
- return self
- def set_opacity(self, opacity):
- """Set opacity via CSS style"""
- text = f"opacity: {opacity}"
- self.append_styled_span(text)
- return self
- def set_pagebreak_after(self):
- """Insert a page break after this node."""
- text = "page-break-after: always"
- self.add_style(text)
- return self
- def set_pagebreak_before(self):
- """Insert a page break before this node."""
- text = "page-break-before: always"
- self.add_style(text)
- return self
- def set_properties(
- self,
- align=None,
- bgcolor=None,
- bold=None,
- color=None,
- columns=None,
- font=None,
- fontsize=None,
- indent=None,
- italic=None,
- leading=None,
- letter_spacing=None,
- lineheight=None,
- margins=None,
- pagebreak_after=None,
- pagebreak_before=None,
- word_spacing=None,
- unqid=None,
- cls=None,
- ):
- """Set any or all properties of a node.
- To be used for existing nodes preferably.
- """
- root = self.root
- temp = root.add_division()
- if align is not None:
- temp.set_align(align)
- if bgcolor is not None:
- temp.set_bgcolor(bgcolor)
- if bold is not None:
- temp.set_bold(bold)
- if color is not None:
- temp.set_color(color)
- if columns is not None:
- temp.set_columns(columns)
- if font is not None:
- temp.set_font(font)
- if fontsize is not None:
- temp.set_fontsize(fontsize)
- if indent is not None:
- temp.set_text_indent(indent)
- if italic is not None:
- temp.set_italic(italic)
- if leading is not None:
- temp.set_leading(leading)
- if letter_spacing is not None:
- temp.set_letter_spacing(letter_spacing)
- if lineheight is not None:
- temp.set_lineheight(lineheight)
- if margins is not None:
- temp.set_margins(margins)
- if pagebreak_after is not None:
- temp.set_pagebreak_after()
- if pagebreak_before is not None:
- temp.set_pagebreak_before()
- if word_spacing is not None:
- temp.set_word_spacing(word_spacing)
- if unqid is not None:
- self.set_id(unqid)
- if cls is not None:
- self.add_class(cls)
- styles = []
- top_style = temp.get_attribute_value("style")
- if top_style is not None:
- styles.append(top_style)
- child = temp.first_child
- while child:
- styles.append(child.get_attribute_value("style"))
- child = child.first_child
- self.set_attribute("style", ";".join(styles))
- temp.remove()
- return self
- def set_text_indent(self, indent):
- """Set text indentation name via CSS style - block-level only."""
- text = f"text-indent: {indent}"
- self.add_style(text)
- return self
- def set_underline(self, val="underline"):
- text = "text-decoration: %s" % val
- self.append_styled_span(text)
- return self
- def set_word_spacing(self, spacing):
- """Set inter-word spacing value via CSS style"""
- text = f"word-spacing: {spacing}"
- self.append_styled_span(text)
- return self
- def span_bottom(self):
- """Find deepest level in stacked spans."""
- parent = self
- child = self.last_child
- if child is None:
- return None
- while child.is_text:
- child = child.previous
- if child is None:
- break
- if child is None or child.tagname != "span":
- return None
- while True:
- if child is None:
- return parent
- if child.tagname in ("a", "sub","sup","body") or child.is_text:
- child = child.next
- continue
- if child.tagname == "span":
- parent = child
- child = child.first_child
- else:
- return parent
- @property
- def tagname( self):
- return mupdf.fz_xml_tag( self.this)
-
- @property
- def text( self):
- return mupdf.fz_xml_text( self.this)
-
- add_var = add_code
- add_samp = add_code
- add_kbd = add_code
- class Colorspace:
- def __init__(self, type_):
- """Supported are GRAY, RGB and CMYK."""
- if isinstance( type_, mupdf.FzColorspace):
- self.this = type_
- elif type_ == CS_GRAY:
- self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_GRAY)
- elif type_ == CS_CMYK:
- self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_CMYK)
- elif type_ == CS_RGB:
- self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
- else:
- self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
- def __repr__(self):
- x = ("", "GRAY", "", "RGB", "CMYK")[self.n]
- return "Colorspace(CS_%s) - %s" % (x, self.name)
- def _name(self):
- return mupdf.fz_colorspace_name(self.this)
- @property
- def n(self):
- """Size of one pixel."""
- return mupdf.fz_colorspace_n(self.this)
- @property
- def name(self):
- """Name of the Colorspace."""
- return self._name()
- class DeviceWrapper:
- def __init__(self, *args):
- if args_match( args, mupdf.FzDevice):
- device, = args
- self.this = device
- elif args_match( args, Pixmap, None):
- pm, clip = args
- bbox = JM_irect_from_py( clip)
- if mupdf.fz_is_infinite_irect( bbox):
- self.this = mupdf.fz_new_draw_device( mupdf.FzMatrix(), pm)
- else:
- self.this = mupdf.fz_new_draw_device_with_bbox( mupdf.FzMatrix(), pm, bbox)
- elif args_match( args, mupdf.FzDisplayList):
- dl, = args
- self.this = mupdf.fz_new_list_device( dl)
- elif args_match( args, mupdf.FzStextPage, None):
- tp, flags = args
- opts = mupdf.FzStextOptions( flags)
- self.this = mupdf.fz_new_stext_device( tp, opts)
- else:
- raise Exception( f'Unrecognised args for DeviceWrapper: {args!r}')
- class DisplayList:
- def __del__(self):
- if not type(self) is DisplayList: return
- self.thisown = False
- def __init__(self, *args):
- if len(args) == 1 and isinstance(args[0], mupdf.FzRect):
- self.this = mupdf.FzDisplayList(args[0])
- elif len(args) == 1 and isinstance(args[0], mupdf.FzDisplayList):
- self.this = args[0]
- else:
- assert 0, f'Unrecognised {args=}'
- def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None):
- if isinstance(colorspace, Colorspace):
- colorspace = colorspace.this
- else:
- colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
- val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None)
- val.thisown = True
- return val
- def get_textpage(self, flags=3):
- """Make a TextPage from a DisplayList."""
- stext_options = mupdf.FzStextOptions()
- stext_options.flags = flags
- val = mupdf.FzStextPage(self.this, stext_options)
- val.thisown = True
- return val
- @property
- def rect(self):
- val = JM_py_from_rect(mupdf.fz_bound_display_list(self.this))
- val = Rect(val)
- return val
- def run(self, dw, m, area):
- mupdf.fz_run_display_list(
- self.this,
- dw.device,
- JM_matrix_from_py(m),
- JM_rect_from_py(area),
- mupdf.FzCookie(),
- )
- if g_use_extra:
- extra_FzDocument_insert_pdf = extra.FzDocument_insert_pdf
- class Document:
- def __contains__(self, loc) -> bool:
- if type(loc) is int:
- if loc < self.page_count:
- return True
- return False
- if type(loc) not in (tuple, list) or len(loc) != 2:
- return False
- chapter, pno = loc
- if (0
- or not isinstance(chapter, int)
- or chapter < 0
- or chapter >= self.chapter_count
- ):
- return False
- if (0
- or not isinstance(pno, int)
- or pno < 0
- or pno >= self.chapter_page_count(chapter)
- ):
- return False
- return True
- def __delitem__(self, i)->None:
- if not self.is_pdf:
- raise ValueError("is no PDF")
- if type(i) is int:
- return self.delete_page(i)
- if type(i) in (list, tuple, range):
- return self.delete_pages(i)
- if type(i) is not slice:
- raise ValueError("bad argument type")
- pc = self.page_count
- start = i.start if i.start else 0
- stop = i.stop if i.stop else pc
- step = i.step if i.step else 1
- while start < 0:
- start += pc
- if start >= pc:
- raise ValueError("bad page number(s)")
- while stop < 0:
- stop += pc
- if stop > pc:
- raise ValueError("bad page number(s)")
- return self.delete_pages(range(start, stop, step))
- def __enter__(self):
- return self
- def __exit__(self, *args):
- self.close()
- @typing.overload
- def __getitem__(self, i: int = 0) -> Page:
- ...
-
- if sys.version_info >= (3, 9):
- @typing.overload
- def __getitem__(self, i: slice) -> list[Page]:
- ...
-
- @typing.overload
- def __getitem__(self, i: tuple[int, int]) -> Page:
- ...
-
- def __getitem__(self, i=0):
- if isinstance(i, slice):
- return [self[j] for j in range(*i.indices(len(self)))]
- assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)), \
- f'Invalid item number: {i=}.'
- if i not in self:
- raise IndexError(f"page {i} not in document")
- return self.load_page(i)
- def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0, height=0, fontsize=11):
- """Creates a document. Use 'open' as a synonym.
- Notes:
- Basic usages:
- open() - new PDF document
- open(filename) - string or pathlib.Path, must have supported
- file extension.
- open(type, buffer) - type: valid extension, buffer: bytes object.
- open(stream=buffer, filetype=type) - keyword version of previous.
- open(filename, fileype=type) - filename with unrecognized extension.
- rect, width, height, fontsize: layout reflowable document
- on open (e.g. EPUB). Ignored if n/a.
- """
- # We temporarily set JM_mupdf_show_errors=0 while we are constructing,
- # then restore its original value in a `finally:` block.
- #
- global JM_mupdf_show_errors
- JM_mupdf_show_errors_old = JM_mupdf_show_errors
- JM_mupdf_show_errors = 0
-
- try:
- self.is_closed = False
- self.is_encrypted = False
- self.is_encrypted = False
- self.metadata = None
- self.FontInfos = []
- self.Graftmaps = {}
- self.ShownPages = {}
- self.InsertedImages = {}
- self._page_refs = weakref.WeakValueDictionary()
- if isinstance(filename, mupdf.PdfDocument):
- pdf_document = filename
- self.this = pdf_document
- self.this_is_pdf = True
- return
-
- w = width
- h = height
- r = JM_rect_from_py(rect)
- if not mupdf.fz_is_infinite_rect(r):
- w = r.x1 - r.x0
- h = r.y1 - r.y0
- self._name = filename
- self.stream = stream
-
- if stream is not None:
- if filename is not None and filetype is None:
- # 2025-05-06: Use <filename> as the filetype. This is
- # reversing precedence - we used to use <filename> if both
- # were set.
- filetype = filename
- if isinstance(stream, (bytes, memoryview)):
- pass
- elif isinstance(stream, bytearray):
- stream = bytes(stream)
- elif isinstance(stream, io.BytesIO):
- stream = stream.getvalue()
- else:
- raise TypeError(f"bad stream: {type(stream)=}.")
- self.stream = stream
-
- assert isinstance(stream, (bytes, memoryview))
- if len(stream) == 0:
- # MuPDF raise an exception for this but also generates
- # warnings, which is not very helpful for us. So instead we
- # raise a specific exception.
- raise EmptyFileError('Cannot open empty stream.')
-
- stream2 = mupdf.fz_open_memory(mupdf.python_buffer_data(stream), len(stream))
- try:
- doc = mupdf.fz_open_document_with_stream(filetype if filetype else '', stream2)
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise FileDataError('Failed to open stream') from e
-
- elif filename:
- assert not stream
- if isinstance(filename, str):
- pass
- elif hasattr(filename, "absolute"):
- filename = str(filename)
- elif hasattr(filename, "name"):
- filename = filename.name
- else:
- raise TypeError(f"bad filename: {type(filename)=} {filename=}.")
- self._name = filename
-
- # Generate our own specific exceptions. This avoids MuPDF
- # generating warnings etc.
- if not os.path.exists(filename):
- raise FileNotFoundError(f"no such file: '{filename}'")
- elif not os.path.isfile(filename):
- raise FileDataError(f"'{filename}' is no file")
- elif os.path.getsize(filename) == 0:
- raise EmptyFileError(f'Cannot open empty file: {filename=}.')
-
- if filetype:
- # Override the type implied by <filename>. MuPDF does not
- # have a way to do this directly so we open via a stream.
- try:
- fz_stream = mupdf.fz_open_file(filename)
- doc = mupdf.fz_open_document_with_stream(filetype, fz_stream)
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e
- else:
- try:
- doc = mupdf.fz_open_document(filename)
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise FileDataError(f'Failed to open file {filename!r}.') from e
- else:
- pdf = mupdf.PdfDocument()
- doc = mupdf.FzDocument(pdf)
-
- if w > 0 and h > 0:
- mupdf.fz_layout_document(doc, w, h, fontsize)
- elif mupdf.fz_is_document_reflowable(doc):
- mupdf.fz_layout_document(doc, 400, 600, 11)
- self.this = doc
- # fixme: not sure where self.thisown gets initialised in PyMuPDF.
- #
- self.thisown = True
- if self.thisown:
- self._graft_id = TOOLS.gen_id()
- if self.needs_pass:
- self.is_encrypted = True
- else: # we won't init until doc is decrypted
- self.init_doc()
- # the following hack detects invalid/empty SVG files, which else may lead
- # to interpreter crashes
- if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower():
- try:
- _ = self.convert_to_pdf() # this seems to always work
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise FileDataError("cannot open broken document") from e
- if g_use_extra:
- self.this_is_pdf = isinstance( self.this, mupdf.PdfDocument)
- if self.this_is_pdf:
- self.page_count2 = extra.page_count_pdf
- else:
- self.page_count2 = extra.page_count_fz
- finally:
- JM_mupdf_show_errors = JM_mupdf_show_errors_old
-
- def __len__(self) -> int:
- return self.page_count
- def __repr__(self) -> str:
- m = "closed " if self.is_closed else ""
- if self.stream is None:
- if self.name == "":
- return m + "Document(<new PDF, doc# %i>)" % self._graft_id
- return m + "Document('%s')" % (self.name,)
- return m + "Document('%s', <memory, doc# %i>)" % (self.name, self._graft_id)
- def _addFormFont(self, name, font):
- """Add new form font."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return
- fonts = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('DR'),
- PDF_NAME('Font'),
- )
- if not fonts.m_internal or not mupdf.pdf_is_dict( fonts):
- raise RuntimeError( "PDF has no form fonts yet")
- k = mupdf.pdf_new_name( name)
- v = JM_pdf_obj_from_str( pdf, font)
- mupdf.pdf_dict_put( fonts, k, v)
- def del_toc_item(
- self,
- idx: int,
- ) -> None:
- """Delete TOC / bookmark item by index."""
- xref = self.get_outline_xrefs()[idx]
- self._remove_toc_item(xref)
- def _delToC(self):
- """Delete the TOC."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- xrefs = [] # create Python list
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return xrefs # not a pdf
- # get the main root
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- # get the outline root
- olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
- if not olroot.m_internal:
- return xrefs # no outlines or some problem
- first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) # first outline
- xrefs = JM_outline_xrefs(first, xrefs)
- xref_count = len(xrefs)
- olroot_xref = mupdf.pdf_to_num(olroot) # delete OL root
- mupdf.pdf_delete_object(pdf, olroot_xref) # delete OL root
- mupdf.pdf_dict_del(root, PDF_NAME('Outlines')) # delete OL root
- for i in range(xref_count):
- _, xref = JM_INT_ITEM(xrefs, i)
- mupdf.pdf_delete_object(pdf, xref) # delete outline item
- xrefs.append(olroot_xref)
- val = xrefs
- self.init_doc()
- return val
- def _delete_page(self, pno):
- pdf = _as_pdf_document(self)
- mupdf.pdf_delete_page( pdf, pno)
- if pdf.m_internal.rev_page_map:
- mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
- def _deleteObject(self, xref):
- """Delete object."""
- pdf = _as_pdf_document(self)
- if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
- raise ValueError( MSG_BAD_XREF)
- mupdf.pdf_delete_object(pdf, xref)
- def _do_links(
- doc1: 'Document',
- doc2: 'Document',
- from_page: int = -1,
- to_page: int = -1,
- start_at: int = -1,
- ) -> None:
- """Insert links contained in copied page range into destination PDF.
- Parameter values **must** equal those of method insert_pdf(), which must
- have been previously executed.
- """
- #pymupdf.log( 'utils.do_links()')
- # --------------------------------------------------------------------------
- # internal function to create the actual "/Annots" object string
- # --------------------------------------------------------------------------
- def cre_annot(lnk, xref_dst, pno_src, ctm):
- """Create annotation object string for a passed-in link."""
- r = lnk["from"] * ctm # rect in PDF coordinates
- rect = _format_g(tuple(r))
- if lnk["kind"] == LINK_GOTO:
- txt = annot_skel["goto1"] # annot_goto
- idx = pno_src.index(lnk["page"])
- p = lnk["to"] * ctm # target point in PDF coordinates
- annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect)
- elif lnk["kind"] == LINK_GOTOR:
- if lnk["page"] >= 0:
- txt = annot_skel["gotor1"] # annot_gotor
- pnt = lnk.get("to", Point(0, 0)) # destination point
- if type(pnt) is not Point:
- pnt = Point(0, 0)
- annot = txt(
- lnk["page"],
- pnt.x,
- pnt.y,
- lnk["zoom"],
- lnk["file"],
- lnk["file"],
- rect,
- )
- else:
- txt = annot_skel["gotor2"] # annot_gotor_n
- to = get_pdf_str(lnk["to"])
- to = to[1:-1]
- f = lnk["file"]
- annot = txt(to, f, rect)
- elif lnk["kind"] == LINK_LAUNCH:
- txt = annot_skel["launch"] # annot_launch
- annot = txt(lnk["file"], lnk["file"], rect)
- elif lnk["kind"] == LINK_URI:
- txt = annot_skel["uri"] # annot_uri
- annot = txt(lnk["uri"], rect)
- else:
- annot = ""
- return annot
- # --------------------------------------------------------------------------
- # validate & normalize parameters
- if from_page < 0:
- fp = 0
- elif from_page >= doc2.page_count:
- fp = doc2.page_count - 1
- else:
- fp = from_page
- if to_page < 0 or to_page >= doc2.page_count:
- tp = doc2.page_count - 1
- else:
- tp = to_page
- if start_at < 0:
- raise ValueError("'start_at' must be >= 0")
- sa = start_at
- incr = 1 if fp <= tp else -1 # page range could be reversed
- # lists of source / destination page numbers
- pno_src = list(range(fp, tp + incr, incr))
- pno_dst = [sa + i for i in range(len(pno_src))]
- # lists of source / destination page xrefs
- xref_src = []
- xref_dst = []
- for i in range(len(pno_src)):
- p_src = pno_src[i]
- p_dst = pno_dst[i]
- old_xref = doc2.page_xref(p_src)
- new_xref = doc1.page_xref(p_dst)
- xref_src.append(old_xref)
- xref_dst.append(new_xref)
- # create the links for each copied page in destination PDF
- for i in range(len(xref_src)):
- page_src = doc2[pno_src[i]] # load source page
- links = page_src.get_links() # get all its links
- #log( '{pno_src=}')
- #log( '{type(page_src)=}')
- #log( '{page_src=}')
- #log( '{=i len(links)}')
- if len(links) == 0: # no links there
- page_src = None
- continue
- ctm = ~page_src.transformation_matrix # calc page transformation matrix
- page_dst = doc1[pno_dst[i]] # load destination page
- link_tab = [] # store all link definitions here
- for l in links:
- if l["kind"] == LINK_GOTO and (l["page"] not in pno_src):
- continue # GOTO link target not in copied pages
- annot_text = cre_annot(l, xref_dst, pno_src, ctm)
- if annot_text:
- link_tab.append(annot_text)
- if link_tab != []:
- page_dst._addAnnot_FromString( tuple(link_tab))
- #log( 'utils.do_links() returning.')
- def _do_widgets(
- tar: 'Document',
- src: 'Document',
- graftmap,
- from_page: int = -1,
- to_page: int = -1,
- start_at: int = -1,
- join_duplicates=0,
- ) -> None:
- """Insert widgets of copied page range into target PDF.
- Parameter values **must** equal those of method insert_pdf() which
- must have been previously executed.
- """
- if not src.is_form_pdf: # nothing to do: source PDF has no fields
- return
- def clean_kid_parents(acro_fields):
- """ Make sure all kids have correct "Parent" pointers."""
- for i in range(acro_fields.pdf_array_len()):
- parent = acro_fields.pdf_array_get(i)
- kids = parent.pdf_dict_get(PDF_NAME("Kids"))
- for j in range(kids.pdf_array_len()):
- kid = kids.pdf_array_get(j)
- kid.pdf_dict_put(PDF_NAME("Parent"), parent)
- def join_widgets(pdf, acro_fields, xref1, xref2, name):
- """Called for each pair of widgets having the same name.
- Args:
- pdf: target MuPDF document
- acro_fields: object Root/AcroForm/Fields
- xref1, xref2: widget xrefs having same names
- name: (str) the name
- Result:
- Defined or updated widget parent that points to both widgets.
- """
- def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2):
- """Merge widget in xref2 into "Kids" list of widget xref1.
- Args:
- xref1, kids1: target widget and its "Kids" array.
- xref2, kids2: source wwidget and its "Kids" array (may be empty).
- """
- # make indirect objects from widgets
- w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0)
- w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0)
- # find source widget in "Fields" array
- idx = acro_fields.pdf_array_find(w2_ind)
- acro_fields.pdf_array_delete(idx)
- if not kids2.pdf_is_array(): # source widget has no kids
- widget = mupdf.pdf_load_object(pdf, xref2)
- # delete name from widget and insert target as parent
- widget.pdf_dict_del(PDF_NAME("T"))
- widget.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
- # put in target Kids
- kids1.pdf_array_push(w2_ind)
- else: # copy source kids to target kids
- for i in range(kids2.pdf_array_len()):
- kid = kids2.pdf_array_get(i)
- kid.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
- kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0)
- kids1.pdf_array_push(kid_ind)
- def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name):
- """Make new "Parent" for two widgets with same name.
- Args:
- xref1, w1: first widget
- xref2, w2: second widget
- name: field name
- Result:
- Both widgets have no "Kids". We create a new object with the
- name and a "Kids" array containing the widgets.
- Original widgets must be removed from AcroForm/Fields.
- """
- # make new "Parent" object
- new = mupdf.pdf_new_dict(pdf, 5)
- new.pdf_dict_put_text_string(PDF_NAME("T"), name)
- kids = new.pdf_dict_put_array(PDF_NAME("Kids"), 2)
- new_obj = mupdf.pdf_add_object(pdf, new)
- new_obj_xref = new_obj.pdf_to_num()
- new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0)
- # copy over some required source widget properties
- ft = w1.pdf_dict_get(PDF_NAME("FT"))
- w1.pdf_dict_del(PDF_NAME("FT"))
- new_obj.pdf_dict_put(PDF_NAME("FT"), ft)
- aa = w1.pdf_dict_get(PDF_NAME("AA"))
- w1.pdf_dict_del(PDF_NAME("AA"))
- new_obj.pdf_dict_put(PDF_NAME("AA"), aa)
- # remove name field, insert "Parent" field in source widgets
- w1.pdf_dict_del(PDF_NAME("T"))
- w1.pdf_dict_put(PDF_NAME("Parent"), new_ind)
- w2.pdf_dict_del(PDF_NAME("T"))
- w2.pdf_dict_put(PDF_NAME("Parent"), new_ind)
- # put source widgets in "kids" array
- ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0)
- ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0)
- kids.pdf_array_push(ind1)
- kids.pdf_array_push(ind2)
- # remove source widgets from "AcroForm/Fields"
- idx = acro_fields.pdf_array_find(ind1)
- acro_fields.pdf_array_delete(idx)
- idx = acro_fields.pdf_array_find(ind2)
- acro_fields.pdf_array_delete(idx)
- acro_fields.pdf_array_push(new_ind)
- w1 = mupdf.pdf_load_object(pdf, xref1)
- w2 = mupdf.pdf_load_object(pdf, xref2)
- kids1 = w1.pdf_dict_get(PDF_NAME("Kids"))
- kids2 = w2.pdf_dict_get(PDF_NAME("Kids"))
- # check which widget has a suitable "Kids" array
- if kids1.pdf_is_array():
- re_target(pdf, acro_fields, xref1, kids1, xref2, kids2) # pylint: disable=arguments-out-of-order
- elif kids2.pdf_is_array():
- re_target(pdf, acro_fields, xref2, kids2, xref1, kids1) # pylint: disable=arguments-out-of-order
- else:
- new_target(pdf, acro_fields, xref1, w1, xref2, w2, name) # pylint: disable=arguments-out-of-order
- def get_kids(parent, kids_list):
- """Return xref list of leaf kids for a parent.
- Call with an empty list.
- """
- kids = mupdf.pdf_dict_get(parent, PDF_NAME("Kids"))
- if not kids.pdf_is_array():
- return kids_list
- for i in range(kids.pdf_array_len()):
- kid = kids.pdf_array_get(i)
- if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, PDF_NAME("Kids"))):
- kids_list = get_kids(kid, kids_list)
- else:
- kids_list.append(kid.pdf_to_num())
- return kids_list
- def kids_xrefs(widget):
- """Get the xref of top "Parent" and the list of leaf widgets."""
- kids_list = []
- parent = mupdf.pdf_dict_get(widget, PDF_NAME("Parent"))
- parent_xref = parent.pdf_to_num()
- if parent_xref == 0:
- return parent_xref, kids_list
- kids_list = get_kids(parent, kids_list)
- return parent_xref, kids_list
- def deduplicate_names(pdf, acro_fields, join_duplicates=False):
- """Handle any widget name duplicates caused by the merge."""
- names = {} # key is a widget name, value a list of widgets having it.
- # extract all names and widgets in "AcroForm/Fields"
- for i in range(mupdf.pdf_array_len(acro_fields)):
- wobject = mupdf.pdf_array_get(acro_fields, i)
- xref = wobject.pdf_to_num()
- # extract widget name and collect widget(s) using it
- T = mupdf.pdf_dict_get_text_string(wobject, PDF_NAME("T"))
- xrefs = names.get(T, [])
- xrefs.append(xref)
- names[T] = xrefs
- for name, xrefs in names.items():
- if len(xrefs) < 2:
- continue
- xref0, xref1 = xrefs[:2] # only exactly 2 should occur!
- if join_duplicates: # combine fields with equal names
- join_widgets(pdf, acro_fields, xref0, xref1, name)
- else: # make field names unique
- newname = name + f" [{xref1}]" # append this to the name
- wobject = mupdf.pdf_load_object(pdf, xref1)
- wobject.pdf_dict_put_text_string(PDF_NAME("T"), newname)
- clean_kid_parents(acro_fields)
- def get_acroform(doc):
- """Retrieve the AcroForm dictionary form a PDF."""
- pdf = mupdf.pdf_document_from_fz_document(doc)
- # AcroForm (= central form field info)
- return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm")
- tarpdf = mupdf.pdf_document_from_fz_document(tar)
- srcpdf = mupdf.pdf_document_from_fz_document(src)
- if tar.is_form_pdf:
- # target is a Form PDF, so use it to include source fields
- acro = get_acroform(tar)
- # Important arrays in AcroForm
- acro_fields = acro.pdf_dict_get(PDF_NAME("Fields"))
- tar_co = acro.pdf_dict_get(PDF_NAME("CO"))
- if not tar_co.pdf_is_array():
- tar_co = acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
- else:
- # target is no Form PDF, so copy over source AcroForm
- acro = mupdf.pdf_deep_copy_obj(get_acroform(src)) # make a copy
- # Clear "Fields" and "CO" arrays: will be populated by page fields.
- # This is required to avoid copying unneeded objects.
- acro.pdf_dict_del(PDF_NAME("Fields"))
- acro.pdf_dict_put_array(PDF_NAME("Fields"), 5)
- acro.pdf_dict_del(PDF_NAME("CO"))
- acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
- # Enrich AcroForm for copying to target
- acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro)
- # Insert AcroForm into target PDF
- acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft)
- acro_fields = acro_tar.pdf_dict_get(PDF_NAME("Fields"))
- tar_co = acro_tar.pdf_dict_get(PDF_NAME("CO"))
- # get its xref and insert it into target catalog
- tar_xref = acro_tar.pdf_to_num()
- acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), PDF_NAME("Root"))
- root.pdf_dict_put(PDF_NAME("AcroForm"), acro_tar_ind)
- if from_page <= to_page:
- src_range = range(from_page, to_page + 1)
- else:
- src_range = range(from_page, to_page - 1, -1)
- parents = {} # information about widget parents
- # remove "P" owning page reference from all widgets of all source pages
- for i in src_range:
- src_page = src[i]
- for xref in [
- xref
- for xref, wtype, _ in src_page.annot_xrefs()
- if wtype == mupdf.PDF_ANNOT_WIDGET # pylint: disable=no-member
- ]:
- w_obj = mupdf.pdf_load_object(srcpdf, xref)
- w_obj.pdf_dict_del(PDF_NAME("P"))
- # get the widget's parent structure
- parent_xref, old_kids = kids_xrefs(w_obj)
- if parent_xref:
- parents[parent_xref] = {
- "new_xref": 0,
- "old_kids": old_kids,
- "new_kids": [],
- }
- # Copy over Parent widgets first - they are not page-dependent
- for xref in parents.keys(): # pylint: disable=consider-using-dict-items
- parent = mupdf.pdf_load_object(srcpdf, xref)
- parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent)
- parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft)
- kids_xrefs_new = get_kids(parent_tar, [])
- parent_xref_new = parent_tar.pdf_to_num()
- parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0)
- acro_fields.pdf_array_push(parent_ind)
- parents[xref]["new_xref"] = parent_xref_new
- parents[xref]["new_kids"] = kids_xrefs_new
- for i in range(len(src_range)):
- # read first copied over page in target
- tar_page = tar[start_at + i]
- # read the original page in the source PDF
- src_page = src[src_range[i]]
- # now walk through source page widgets and copy over
- w_xrefs = [ # widget xrefs of the source page
- xref
- for xref, wtype, _ in src_page.annot_xrefs()
- if wtype == mupdf.PDF_ANNOT_WIDGET # pylint: disable=no-member
- ]
- if not w_xrefs: # no widgets on this source page
- continue
- # convert to formal PDF page
- tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page)
- # extract annotations array
- tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), PDF_NAME("Annots"))
- if not mupdf.pdf_is_array(tar_annots):
- tar_annots = mupdf.pdf_dict_put_array(
- tar_page_pdf.obj(), PDF_NAME("Annots"), 5
- )
- for xref in w_xrefs:
- w_obj = mupdf.pdf_load_object(srcpdf, xref)
- # check if field takes part in inter-field validations
- is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C"))
- # check if parent of widget already in target
- parent_xref = mupdf.pdf_to_num(
- w_obj.pdf_dict_get(PDF_NAME("Parent"))
- )
- if parent_xref == 0: # parent not in target yet
- try:
- w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj)
- except Exception as e:
- message_warning(f"cannot copy widget at {xref=}: {e}")
- continue
- w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft)
- tar_xref = w_obj_tar.pdf_to_num()
- w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
- mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
- mupdf.pdf_array_push(acro_fields, w_obj_tar_ind)
- else:
- parent = parents[parent_xref]
- idx = parent["old_kids"].index(xref) # search for xref in parent
- tar_xref = parent["new_kids"][idx]
- w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
- mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
- # Into "AcroForm/CO" if a computation field.
- if is_aac:
- mupdf.pdf_array_push(tar_co, w_obj_tar_ind)
- deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates)
- def _embeddedFileGet(self, idx):
- pdf = _as_pdf_document(self)
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- entry = mupdf.pdf_array_get(names, 2*idx+1)
- filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
- buf = mupdf.pdf_load_stream(filespec)
- cont = JM_BinFromBuffer(buf)
- return cont
- def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int:
- filenames = self.embfile_names()
- msg = "'%s' not in EmbeddedFiles array." % str(item)
- if item in filenames:
- idx = filenames.index(item)
- elif item in range(len(filenames)):
- idx = item
- else:
- raise ValueError(msg)
- return idx
- def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None):
- pdf = _as_pdf_document(self)
- data = JM_BufferFromBytes(buffer_)
- if not data.m_internal:
- raise TypeError( MSG_BAD_BUFFER)
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- if not mupdf.pdf_is_array(names):
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- names = mupdf.pdf_new_array(pdf, 6) # an even number!
- mupdf.pdf_dict_putl(
- root,
- names,
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- fileentry = JM_embed_file(pdf, data, filename, ufilename, desc, 1)
- xref = mupdf.pdf_to_num(
- mupdf.pdf_dict_getl(fileentry, PDF_NAME('EF'), PDF_NAME('F'))
- )
- mupdf.pdf_array_push(names, mupdf.pdf_new_text_string(name))
- mupdf.pdf_array_push(names, fileentry)
- return xref
- def _embfile_del(self, idx):
- pdf = _as_pdf_document(self)
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- mupdf.pdf_array_delete(names, idx + 1)
- mupdf.pdf_array_delete(names, idx)
- def _embfile_info(self, idx, infodict):
- pdf = _as_pdf_document(self)
- xref = 0
- ci_xref=0
- trailer = mupdf.pdf_trailer(pdf)
- names = mupdf.pdf_dict_getl(
- trailer,
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- o = mupdf.pdf_array_get(names, 2*idx+1)
- ci = mupdf.pdf_dict_get(o, PDF_NAME('CI'))
- if ci.m_internal:
- ci_xref = mupdf.pdf_to_num(ci)
- infodict["collection"] = ci_xref
- name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('F')))
- infodict[dictkey_filename] = JM_EscapeStrFromStr(name)
- name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('UF')))
- infodict[dictkey_ufilename] = JM_EscapeStrFromStr(name)
- name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('Desc')))
- infodict[dictkey_descr] = JM_UnicodeFromStr(name)
- len_ = -1
- DL = -1
- fileentry = mupdf.pdf_dict_getl(o, PDF_NAME('EF'), PDF_NAME('F'))
- xref = mupdf.pdf_to_num(fileentry)
- o = mupdf.pdf_dict_get(fileentry, PDF_NAME('Length'))
- if o.m_internal:
- len_ = mupdf.pdf_to_int(o)
- o = mupdf.pdf_dict_get(fileentry, PDF_NAME('DL'))
- if o.m_internal:
- DL = mupdf.pdf_to_int(o)
- else:
- o = mupdf.pdf_dict_getl(fileentry, PDF_NAME('Params'), PDF_NAME('Size'))
- if o.m_internal:
- DL = mupdf.pdf_to_int(o)
- infodict[dictkey_size] = DL
- infodict[dictkey_length] = len_
- return xref
- def _embfile_names(self, namelist):
- """Get list of embedded file names."""
- pdf = _as_pdf_document(self)
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- if mupdf.pdf_is_array(names):
- n = mupdf.pdf_array_len(names)
- for i in range(0, n, 2):
- val = JM_EscapeStrFromStr(
- mupdf.pdf_to_text_string(
- mupdf.pdf_array_get(names, i)
- )
- )
- namelist.append(val)
- def _embfile_upd(self, idx, buffer_=None, filename=None, ufilename=None, desc=None):
- pdf = _as_pdf_document(self)
- xref = 0
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- entry = mupdf.pdf_array_get(names, 2*idx+1)
- filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
- if not filespec.m_internal:
- RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
- res = JM_BufferFromBytes(buffer_)
- if buffer_ and buffer_.m_internal and not res.m_internal:
- raise TypeError( MSG_BAD_BUFFER)
- if res.m_internal and buffer_ and buffer_.m_internal:
- JM_update_stream(pdf, filespec, res, 1)
- # adjust /DL and /Size parameters
- len, _ = mupdf.fz_buffer_storage(res)
- l = mupdf.pdf_new_int(len)
- mupdf.pdf_dict_put(filespec, PDF_NAME('DL'), l)
- mupdf.pdf_dict_putl(filespec, l, PDF_NAME('Params'), PDF_NAME('Size'))
- xref = mupdf.pdf_to_num(filespec)
- if filename:
- mupdf.pdf_dict_put_text_string(entry, PDF_NAME('F'), filename)
- if ufilename:
- mupdf.pdf_dict_put_text_string(entry, PDF_NAME('UF'), ufilename)
- if desc:
- mupdf.pdf_dict_put_text_string(entry, PDF_NAME('Desc'), desc)
- return xref
- def _extend_toc_items(self, items):
- """Add color info to all items of an extended TOC list."""
- if self.is_closed:
- raise ValueError("document closed")
- if g_use_extra:
- return extra.Document_extend_toc_items( self.this, items)
- pdf = _as_pdf_document(self)
- zoom = "zoom"
- bold = "bold"
- italic = "italic"
- collapse = "collapse"
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- if not root.m_internal:
- return
- olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
- if not olroot.m_internal:
- return
- first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
- if not first.m_internal:
- return
- xrefs = []
- xrefs = JM_outline_xrefs(first, xrefs)
- n = len(xrefs)
- m = len(items)
- if not n:
- return
- if n != m:
- raise IndexError( "internal error finding outline xrefs")
- # update all TOC item dictionaries
- for i in range(n):
- xref = int(xrefs[i])
- item = items[i]
- itemdict = item[3]
- if not isinstance(itemdict, dict):
- raise ValueError( "need non-simple TOC format")
- itemdict[dictkey_xref] = xrefs[i]
- bm = mupdf.pdf_load_object(pdf, xref)
- flags = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('F')))
- if flags == 1:
- itemdict[italic] = True
- elif flags == 2:
- itemdict[bold] = True
- elif flags == 3:
- itemdict[italic] = True
- itemdict[bold] = True
- count = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('Count')))
- if count < 0:
- itemdict[collapse] = True
- elif count > 0:
- itemdict[collapse] = False
- col = mupdf.pdf_dict_get(bm, PDF_NAME('C'))
- if mupdf.pdf_is_array(col) and mupdf.pdf_array_len(col) == 3:
- color = (
- mupdf.pdf_to_real(mupdf.pdf_array_get(col, 0)),
- mupdf.pdf_to_real(mupdf.pdf_array_get(col, 1)),
- mupdf.pdf_to_real(mupdf.pdf_array_get(col, 2)),
- )
- itemdict[dictkey_color] = color
- z=0
- obj = mupdf.pdf_dict_get(bm, PDF_NAME('Dest'))
- if not obj.m_internal or not mupdf.pdf_is_array(obj):
- obj = mupdf.pdf_dict_getl(bm, PDF_NAME('A'), PDF_NAME('D'))
- if mupdf.pdf_is_array(obj) and mupdf.pdf_array_len(obj) == 5:
- z = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, 4))
- itemdict[zoom] = float(z)
- item[3] = itemdict
- items[i] = item
- def _forget_page(self, page: Page):
- """Remove a page from document page dict."""
- pid = id(page)
- if pid in self._page_refs:
- #self._page_refs[pid] = None
- del self._page_refs[pid]
- def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limit: int, idx: int = 0):
- pdf = _as_pdf_document(self)
- mylimit = limit
- if mylimit < 256:
- mylimit = 256
- if ordering >= 0:
- data, size, index = mupdf.fz_lookup_cjk_font(ordering)
- font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
- else:
- data, size = mupdf.fz_lookup_base14_font(bfname)
- if data:
- font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
- else:
- buf = JM_get_fontbuffer(pdf, xref)
- if not buf.m_internal:
- raise Exception("font at xref %d is not supported" % xref)
- font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0)
- wlist = []
- for i in range(mylimit):
- glyph = mupdf.fz_encode_character(font, i)
- adv = mupdf.fz_advance_glyph(font, glyph, 0)
- if ordering >= 0:
- glyph = i
- if glyph > 0:
- wlist.append( (glyph, adv))
- else:
- wlist.append( (glyph, 0.0))
- return wlist
- def _get_page_labels(self):
- pdf = _as_pdf_document(self)
- rc = []
- pagelabels = mupdf.pdf_new_name("PageLabels")
- obj = mupdf.pdf_dict_getl( mupdf.pdf_trailer(pdf), PDF_NAME('Root'), pagelabels)
- if not obj.m_internal:
- return rc
- # simple case: direct /Nums object
- nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Nums')))
- if nums.m_internal:
- JM_get_page_labels(rc, nums)
- return rc
- # case: /Kids/Nums
- nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_getl(obj, PDF_NAME('Kids'), PDF_NAME('Nums')))
- if nums.m_internal:
- JM_get_page_labels(rc, nums)
- return rc
- # case: /Kids is an array of multiple /Nums
- kids = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Kids')))
- if not kids.m_internal or not mupdf.pdf_is_array(kids):
- return rc
- n = mupdf.pdf_array_len(kids)
- for i in range(n):
- nums = mupdf.pdf_resolve_indirect(
- mupdf.pdf_dict_get(
- mupdf.pdf_array_get(kids, i),
- PDF_NAME('Nums'),
- )
- )
- JM_get_page_labels(rc, nums)
- return rc
- def _getMetadata(self, key):
- """Get metadata."""
- try:
- return mupdf.fz_lookup_metadata2( self.this, key)
- except Exception:
- if g_exceptions_verbose > 2: exception_info()
- return ''
- def _getOLRootNumber(self):
- """Get xref of Outline Root, create it if missing."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- # get main root
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- # get outline root
- olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
- if not olroot.m_internal:
- olroot = mupdf.pdf_new_dict( pdf, 4)
- mupdf.pdf_dict_put( olroot, PDF_NAME('Type'), PDF_NAME('Outlines'))
- ind_obj = mupdf.pdf_add_object( pdf, olroot)
- mupdf.pdf_dict_put( root, PDF_NAME('Outlines'), ind_obj)
- olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
- return mupdf.pdf_to_num( olroot)
- def _getPDFfileid(self):
- """Get PDF file id."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return
- idlist = []
- identity = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
- if identity.m_internal:
- n = mupdf.pdf_array_len(identity)
- for i in range(n):
- o = mupdf.pdf_array_get(identity, i)
- text = mupdf.pdf_to_text_string(o)
- hex_ = binascii.hexlify(text)
- idlist.append(hex_)
- return idlist
- def _getPageInfo(self, pno, what):
- """List fonts, images, XObjects used on a page."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- doc = self.this
- pageCount = mupdf.pdf_count_pages(doc) if isinstance(doc, mupdf.PdfDocument) else mupdf.fz_count_pages(doc)
- n = pno # pno < 0 is allowed
- while n < 0:
- n += pageCount # make it non-negative
- if n >= pageCount:
- raise ValueError( MSG_BAD_PAGENO)
- pdf = _as_pdf_document(self)
- pageref = mupdf.pdf_lookup_page_obj(pdf, n)
- rsrc = mupdf.pdf_dict_get_inheritable(pageref, mupdf.PDF_ENUM_NAME_Resources)
- liste = []
- tracer = []
- if rsrc.m_internal:
- JM_scan_resources(pdf, rsrc, liste, what, 0, tracer)
- return liste
- def _insert_font(self, fontfile=None, fontbuffer=None):
- '''
- Utility: insert font from file or binary.
- '''
- pdf = _as_pdf_document(self)
- if not fontfile and not fontbuffer:
- raise ValueError( MSG_FILE_OR_BUFFER)
- value = JM_insert_font(pdf, None, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1)
- return value
- def _loadOutline(self):
- """Load first outline."""
- doc = self.this
- assert isinstance( doc, mupdf.FzDocument)
- try:
- ol = mupdf.fz_load_outline( doc)
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return
- return Outline( ol)
- def _make_page_map(self):
- """Make an array page number -> page object."""
- if self.is_closed:
- raise ValueError("document closed")
- assert 0, f'_make_page_map() is no-op'
- def _move_copy_page(self, pno, nb, before, copy):
- """Move or copy a PDF page reference."""
- pdf = _as_pdf_document(self)
- same = 0
- # get the two page objects -----------------------------------
- # locate the /Kids arrays and indices in each
- page1, parent1, i1 = pdf_lookup_page_loc( pdf, pno)
- kids1 = mupdf.pdf_dict_get( parent1, PDF_NAME('Kids'))
- page2, parent2, i2 = pdf_lookup_page_loc( pdf, nb)
- kids2 = mupdf.pdf_dict_get( parent2, PDF_NAME('Kids'))
- if before: # calc index of source page in target /Kids
- pos = i2
- else:
- pos = i2 + 1
- # same /Kids array? ------------------------------------------
- same = mupdf.pdf_objcmp( kids1, kids2)
- # put source page in target /Kids array ----------------------
- if not copy and same != 0: # update parent in page object
- mupdf.pdf_dict_put( page1, PDF_NAME('Parent'), parent2)
- mupdf.pdf_array_insert( kids2, page1, pos)
- if same != 0: # different /Kids arrays ----------------------
- parent = parent2
- while parent.m_internal: # increase /Count objects in parents
- count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
- mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
- parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
- if not copy: # delete original item
- mupdf.pdf_array_delete( kids1, i1)
- parent = parent1
- while parent.m_internal: # decrease /Count objects in parents
- count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
- mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count - 1)
- parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
- else: # same /Kids array
- if copy: # source page is copied
- parent = parent2
- while parent.m_internal: # increase /Count object in parents
- count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
- mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
- parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
- else:
- if i1 < pos:
- mupdf.pdf_array_delete( kids1, i1)
- else:
- mupdf.pdf_array_delete( kids1, i1 + 1)
- if pdf.m_internal.rev_page_map: # page map no longer valid: drop it
- mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
- self._reset_page_refs()
- def _newPage(self, pno=-1, width=595, height=842):
- """Make a new PDF page."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if g_use_extra:
- extra._newPage( self.this, pno, width, height)
- else:
- pdf = _as_pdf_document(self)
- mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- mediabox.x1 = width
- mediabox.y1 = height
- contents = mupdf.FzBuffer()
- if pno < -1:
- raise ValueError( MSG_BAD_PAGENO)
- # create /Resources and /Contents objects
- #resources = pdf.add_object(pdf.new_dict(1))
- resources = mupdf.pdf_add_new_dict(pdf, 1)
- page_obj = mupdf.pdf_add_page( pdf, mediabox, 0, resources, contents)
- mupdf.pdf_insert_page( pdf, pno, page_obj)
- # fixme: pdf->dirty = 1;
- self._reset_page_refs()
- return self[pno]
- def _remove_links_to(self, numbers):
- pdf = _as_pdf_document(self)
- _remove_dest_range(pdf, numbers)
- def _remove_toc_item(self, xref):
- # "remove" bookmark by letting it point to nowhere
- pdf = _as_pdf_document(self)
- item = mupdf.pdf_new_indirect(pdf, xref, 0)
- mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
- mupdf.pdf_dict_del( item, PDF_NAME('A'))
- color = mupdf.pdf_new_array( pdf, 3)
- for i in range(3):
- mupdf.pdf_array_push_real( color, 0.8)
- mupdf.pdf_dict_put( item, PDF_NAME('C'), color)
- def _reset_page_refs(self):
- """Invalidate all pages in document dictionary."""
- if getattr(self, "is_closed", True):
- return
- pages = [p for p in self._page_refs.values()]
- for page in pages:
- if page:
- page._erase()
- page = None
- self._page_refs.clear()
- def _set_page_labels(self, labels):
- pdf = _as_pdf_document(self)
- pagelabels = mupdf.pdf_new_name("PageLabels")
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- mupdf.pdf_dict_del(root, pagelabels)
- mupdf.pdf_dict_putl(root, mupdf.pdf_new_array(pdf, 0), pagelabels, PDF_NAME('Nums'))
- xref = self.pdf_catalog()
- text = self.xref_object(xref, compressed=True)
- text = text.replace("/Nums[]", "/Nums[%s]" % labels)
- self.update_object(xref, text)
- def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None):
- '''
- "update" bookmark by letting it point to nowhere
- '''
- pdf = _as_pdf_document(self)
- item = mupdf.pdf_new_indirect( pdf, xref, 0)
- if title:
- mupdf.pdf_dict_put_text_string( item, PDF_NAME('Title'), title)
- if action:
- mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
- obj = JM_pdf_obj_from_str( pdf, action)
- mupdf.pdf_dict_put( item, PDF_NAME('A'), obj)
- mupdf.pdf_dict_put_int( item, PDF_NAME('F'), flags)
- if color:
- c = mupdf.pdf_new_array( pdf, 3)
- for i in range(3):
- f = color[i]
- mupdf.pdf_array_push_real( c, f)
- mupdf.pdf_dict_put( item, PDF_NAME('C'), c)
- elif color is not None:
- mupdf.pdf_dict_del( item, PDF_NAME('C'))
- if collapse is not None:
- if mupdf.pdf_dict_get( item, PDF_NAME('Count')).m_internal:
- i = mupdf.pdf_dict_get_int( item, PDF_NAME('Count'))
- if (i < 0 and collapse is False) or (i > 0 and collapse is True):
- i = i * (-1)
- mupdf.pdf_dict_put_int( item, PDF_NAME('Count'), i)
- @property
- def FormFonts(self):
- """Get list of field font resource names."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return
- fonts = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('DR'),
- PDF_NAME('Font'),
- )
- liste = list()
- if fonts.m_internal and mupdf.pdf_is_dict(fonts): # fonts exist
- n = mupdf.pdf_dict_len(fonts)
- for i in range(n):
- f = mupdf.pdf_dict_get_key(fonts, i)
- liste.append(JM_UnicodeFromStr(mupdf.pdf_to_name(f)))
- return liste
- def add_layer(self, name, creator=None, on=None):
- """Add a new OC layer."""
- pdf = _as_pdf_document(self)
- JM_add_layer_config( pdf, name, creator, on)
- mupdf.ll_pdf_read_ocg( pdf.m_internal)
- def add_ocg(self, name, config=-1, on=1, intent=None, usage=None):
- """Add new optional content group."""
- xref = 0
- pdf = _as_pdf_document(self)
- # make the OCG
- ocg = mupdf.pdf_add_new_dict(pdf, 3)
- mupdf.pdf_dict_put(ocg, PDF_NAME('Type'), PDF_NAME('OCG'))
- mupdf.pdf_dict_put_text_string(ocg, PDF_NAME('Name'), name)
- intents = mupdf.pdf_dict_put_array(ocg, PDF_NAME('Intent'), 2)
- if not intent:
- mupdf.pdf_array_push(intents, PDF_NAME('View'))
- elif not isinstance(intent, str):
- assert 0, f'fixme: intent is not a str. {type(intent)=} {type=}'
- #n = len(intent)
- #for i in range(n):
- # item = intent[i]
- # c = JM_StrAsChar(item);
- # if (c) {
- # pdf_array_push(gctx, intents, pdf_new_name(gctx, c));
- # }
- # Py_DECREF(item);
- #}
- else:
- mupdf.pdf_array_push(intents, mupdf.pdf_new_name(intent))
- use_for = mupdf.pdf_dict_put_dict(ocg, PDF_NAME('Usage'), 3)
- ci_name = mupdf.pdf_new_name("CreatorInfo")
- cre_info = mupdf.pdf_dict_put_dict(use_for, ci_name, 2)
- mupdf.pdf_dict_put_text_string(cre_info, PDF_NAME('Creator'), "PyMuPDF")
- if usage:
- mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), usage)
- else:
- mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), "Artwork")
- indocg = mupdf.pdf_add_object(pdf, ocg)
- # Insert OCG in the right config
- ocp = JM_ensure_ocproperties(pdf)
- obj = mupdf.pdf_dict_get(ocp, PDF_NAME('OCGs'))
- mupdf.pdf_array_push(obj, indocg)
- if config > -1:
- obj = mupdf.pdf_dict_get(ocp, PDF_NAME('Configs'))
- if not mupdf.pdf_is_array(obj):
- raise ValueError( MSG_BAD_OC_CONFIG)
- cfg = mupdf.pdf_array_get(obj, config)
- if not cfg.m_internal:
- raise ValueError( MSG_BAD_OC_CONFIG)
- else:
- cfg = mupdf.pdf_dict_get(ocp, PDF_NAME('D'))
- obj = mupdf.pdf_dict_get(cfg, PDF_NAME('Order'))
- if not obj.m_internal:
- obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('Order'), 1)
- mupdf.pdf_array_push(obj, indocg)
- if on:
- obj = mupdf.pdf_dict_get(cfg, PDF_NAME('ON'))
- if not obj.m_internal:
- obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('ON'), 1)
- else:
- obj =mupdf.pdf_dict_get(cfg, PDF_NAME('OFF'))
- if not obj.m_internal:
- obj =mupdf.pdf_dict_put_array(cfg, PDF_NAME('OFF'), 1)
- mupdf.pdf_array_push(obj, indocg)
- # let MuPDF take note: re-read OCProperties
- mupdf.ll_pdf_read_ocg(pdf.m_internal)
- xref = mupdf.pdf_to_num(indocg)
- return xref
- def authenticate(self, password):
- """Decrypt document."""
- if self.is_closed:
- raise ValueError("document closed")
- val = mupdf.fz_authenticate_password(self.this, password)
- if val: # the doc is decrypted successfully and we init the outline
- self.is_encrypted = False
- self.is_encrypted = False
- self.init_doc()
- self.thisown = True
- return val
- def can_save_incrementally(self):
- """Check whether incremental saves are possible."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False
- return mupdf.pdf_can_be_saved_incrementally(pdf)
- def bake(self, *, annots: bool = True, widgets: bool = True) -> None:
- """Convert annotations or fields to permanent content.
- Notes:
- Converts annotations or widgets to permanent page content, like
- text and vector graphics, as appropriate.
- After execution, pages will still look the same, but no longer
- have annotations, respectively no fields.
- If widgets are selected the PDF will no longer be a Form PDF.
- Args:
- annots: convert annotations
- widgets: convert form fields
- """
- pdf = _as_pdf_document(self)
- mupdf.pdf_bake_document(pdf, int(annots), int(widgets))
- @property
- def chapter_count(self):
- """Number of chapters."""
- if self.is_closed:
- raise ValueError("document closed")
- return mupdf.fz_count_chapters( self.this)
- def chapter_page_count(self, chapter):
- """Page count of chapter."""
- if self.is_closed:
- raise ValueError("document closed")
- chapters = mupdf.fz_count_chapters( self.this)
- if chapter < 0 or chapter >= chapters:
- raise ValueError( "bad chapter number")
- pages = mupdf.fz_count_chapter_pages( self.this, chapter)
- return pages
- def close(self):
- """Close document."""
- if getattr(self, "is_closed", True):
- raise ValueError("document closed")
- # self._cleanup()
- if hasattr(self, "_outline") and self._outline:
- self._outline = None
- self._reset_page_refs()
- #self.metadata = None
- #self.stream = None
- self.is_closed = True
- #self.FontInfos = []
- self.Graftmaps = {} # Fixes test_3140().
- #self.ShownPages = {}
- #self.InsertedImages = {}
- #self.this = None
- self.this = None
- def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0):
- """Convert document to a PDF, selecting page range and optional rotation. Output bytes object."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- fz_doc = self.this
- fp = from_page
- tp = to_page
- srcCount = mupdf.fz_count_pages(fz_doc)
- if fp < 0:
- fp = 0
- if fp > srcCount - 1:
- fp = srcCount - 1
- if tp < 0:
- tp = srcCount - 1
- if tp > srcCount - 1:
- tp = srcCount - 1
- len0 = len(JM_mupdf_warnings_store)
- doc = JM_convert_to_pdf(fz_doc, fp, tp, rotate)
- len1 = len(JM_mupdf_warnings_store)
- for i in range(len0, len1):
- message(f'{JM_mupdf_warnings_store[i]}')
- return doc
- def copy_page(self, pno: int, to: int =-1):
- """Copy a page within a PDF document.
- This will only create another reference of the same page object.
- Args:
- pno: source page number
- to: put before this page, '-1' means after last page.
- """
- if self.is_closed:
- raise ValueError("document closed")
- page_count = len(self)
- if (
- pno not in range(page_count)
- or to not in range(-1, page_count)
- ):
- raise ValueError("bad page number(s)")
- before = 1
- copy = 1
- if to == -1:
- to = page_count - 1
- before = 0
- return self._move_copy_page(pno, to, before, copy)
- def del_xml_metadata(self):
- """Delete XML metadata."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- if root.m_internal:
- mupdf.pdf_dict_del( root, PDF_NAME('Metadata'))
- def delete_page(self, pno: int =-1):
- """ Delete one page from a PDF.
- """
- return self.delete_pages(pno)
- def delete_pages(self, *args, **kw):
- """Delete pages from a PDF.
- Args:
- Either keywords 'from_page'/'to_page', or two integers to
- specify the first/last page to delete.
- Or a list/tuple/range object, which can contain arbitrary
- page numbers.
- Or a single integer page number.
- """
- if not self.is_pdf:
- raise ValueError("is no PDF")
- if self.is_closed:
- raise ValueError("document closed")
- page_count = self.page_count # page count of document
- f = t = -1
- if kw: # check if keywords were used
- if args: # then no positional args are allowed
- raise ValueError("cannot mix keyword and positional argument")
- f = kw.get("from_page", -1) # first page to delete
- t = kw.get("to_page", -1) # last page to delete
- while f < 0:
- f += page_count
- while t < 0:
- t += page_count
- if not f <= t < page_count:
- raise ValueError("bad page number(s)")
- numbers = tuple(range(f, t + 1))
- else:
- if len(args) > 2 or args == []:
- raise ValueError("need 1 or 2 positional arguments")
- if len(args) == 2:
- f, t = args
- if not (type(f) is int and type(t) is int):
- raise ValueError("both arguments must be int")
- if f > t:
- f, t = t, f
- if not f <= t < page_count:
- raise ValueError("bad page number(s)")
- numbers = tuple(range(f, t + 1))
- elif isinstance(args[0], int):
- pno = args[0]
- while pno < 0:
- pno += page_count
- numbers = (pno,)
- else:
- numbers = tuple(args[0])
- numbers = list(map(int, set(numbers))) # ensure unique integers
- if numbers == []:
- message("nothing to delete")
- return
- numbers.sort()
- if numbers[0] < 0 or numbers[-1] >= page_count:
- raise ValueError("bad page number(s)")
- frozen_numbers = frozenset(numbers)
- toc = self.get_toc()
- for i, xref in enumerate(self.get_outline_xrefs()):
- if toc[i][2] - 1 in frozen_numbers:
- self._remove_toc_item(xref) # remove target in PDF object
- self._remove_links_to(frozen_numbers)
- for i in reversed(numbers): # delete pages, last to first
- self._delete_page(i)
- self._reset_page_refs()
- def embfile_add(self,
- name: str,
- buffer_: ByteString,
- filename: OptStr =None,
- ufilename: OptStr =None,
- desc: OptStr =None,
- ) -> None:
- """Add an item to the EmbeddedFiles array.
- Args:
- name: name of the new item, must not already exist.
- buffer_: (binary data) the file content.
- filename: (str) the file name, default: the name
- ufilename: (unicode) the file name, default: filename
- desc: (str) the description.
- """
- filenames = self.embfile_names()
- msg = "Name '%s' already exists." % str(name)
- if name in filenames:
- raise ValueError(msg)
- if filename is None:
- filename = name
- if ufilename is None:
- ufilename = filename
- if desc is None:
- desc = name
- xref = self._embfile_add(
- name,
- buffer_=buffer_,
- filename=filename,
- ufilename=ufilename,
- desc=desc,
- )
- date = get_pdf_now()
- self.xref_set_key(xref, "Type", "/EmbeddedFile")
- self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date))
- self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
- return xref
- def embfile_count(self) -> int:
- """Get number of EmbeddedFiles."""
- return len(self.embfile_names())
- def embfile_del(self, item: typing.Union[int, str]):
- """Delete an entry from EmbeddedFiles.
- Notes:
- The argument must be name or index of an EmbeddedFiles item.
- Physical deletion of data will happen on save to a new
- file with appropriate garbage option.
- Args:
- item: name or number of item.
- Returns:
- None
- """
- idx = self._embeddedFileIndex(item)
- return self._embfile_del(idx)
- def embfile_get(self, item: typing.Union[int, str]) -> bytes:
- """Get the content of an item in the EmbeddedFiles array.
- Args:
- item: number or name of item.
- Returns:
- (bytes) The file content.
- """
- idx = self._embeddedFileIndex(item)
- return self._embeddedFileGet(idx)
- def embfile_info(self, item: typing.Union[int, str]) -> dict:
- """Get information of an item in the EmbeddedFiles array.
- Args:
- item: number or name of item.
- Returns:
- Information dictionary.
- """
- idx = self._embeddedFileIndex(item)
- infodict = {"name": self.embfile_names()[idx]}
- xref = self._embfile_info(idx, infodict)
- t, date = self.xref_get_key(xref, "Params/CreationDate")
- if t != "null":
- infodict["creationDate"] = date
- t, date = self.xref_get_key(xref, "Params/ModDate")
- if t != "null":
- infodict["modDate"] = date
- t, md5 = self.xref_get_key(xref, "Params/CheckSum")
- if t != "null":
- infodict["checksum"] = binascii.hexlify(md5.encode()).decode()
- return infodict
- def embfile_names(self) -> list:
- """Get list of names of EmbeddedFiles."""
- filenames = []
- self._embfile_names(filenames)
- return filenames
- def embfile_upd(self,
- item: typing.Union[int, str],
- buffer_: OptBytes =None,
- filename: OptStr =None,
- ufilename: OptStr =None,
- desc: OptStr =None,
- ) -> None:
- """Change an item of the EmbeddedFiles array.
- Notes:
- Only provided parameters are changed. If all are omitted,
- the method is a no-op.
- Args:
- item: number or name of item.
- buffer_: (binary data) the new file content.
- filename: (str) the new file name.
- ufilename: (unicode) the new filen ame.
- desc: (str) the new description.
- """
- idx = self._embeddedFileIndex(item)
- xref = self._embfile_upd(
- idx,
- buffer_=buffer_,
- filename=filename,
- ufilename=ufilename,
- desc=desc,
- )
- date = get_pdf_now()
- self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
- return xref
- def extract_font(self, xref=0, info_only=0, named=None):
- '''
- Get a font by xref. Returns a tuple or dictionary.
- '''
- #log( '{=xref info_only}')
- pdf = _as_pdf_document(self)
- obj = mupdf.pdf_load_object(pdf, xref)
- type_ = mupdf.pdf_dict_get(obj, PDF_NAME('Type'))
- subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
- if (mupdf.pdf_name_eq(type_, PDF_NAME('Font'))
- and not mupdf.pdf_to_name( subtype).startswith('CIDFontType')
- ):
- basefont = mupdf.pdf_dict_get(obj, PDF_NAME('BaseFont'))
- if not basefont.m_internal or mupdf.pdf_is_null(basefont):
- bname = mupdf.pdf_dict_get(obj, PDF_NAME('Name'))
- else:
- bname = basefont
- ext = JM_get_fontextension(pdf, xref)
- if ext != 'n/a' and not info_only:
- buffer_ = JM_get_fontbuffer(pdf, xref)
- bytes_ = JM_BinFromBuffer(buffer_)
- else:
- bytes_ = b''
- if not named:
- rc = (
- JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
- JM_UnicodeFromStr(ext),
- JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
- bytes_,
- )
- else:
- rc = {
- dictkey_name: JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
- dictkey_ext: JM_UnicodeFromStr(ext),
- dictkey_type: JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
- dictkey_content: bytes_,
- }
- else:
- if not named:
- rc = '', '', '', b''
- else:
- rc = {
- dictkey_name: '',
- dictkey_ext: '',
- dictkey_type: '',
- dictkey_content: b'',
- }
- return rc
- def extract_image(self, xref):
- """Get image by xref. Returns a dictionary."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
- raise ValueError( MSG_BAD_XREF)
- obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
- if not mupdf.pdf_name_eq(subtype, PDF_NAME('Image')):
- raise ValueError( "not an image")
- o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask'))
- if o.m_internal:
- smask = mupdf.pdf_to_num(o)
- else:
- smask = 0
- # load the image
- img = mupdf.pdf_load_image(pdf, obj)
- rc = dict()
- _make_image_dict(img, rc)
- rc[dictkey_smask] = smask
- rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace())
- return rc
- def ez_save(
- self,
- filename,
- garbage=3,
- clean=False,
- deflate=True,
- deflate_images=True,
- deflate_fonts=True,
- incremental=False,
- ascii=False,
- expand=False,
- linear=False,
- pretty=False,
- encryption=1,
- permissions=4095,
- owner_pw=None,
- user_pw=None,
- no_new_id=True,
- preserve_metadata=1,
- use_objstms=1,
- compression_effort=0,
- ):
- '''
- Save PDF using some different defaults
- '''
- return self.save(
- filename,
- garbage=garbage,
- clean=clean,
- deflate=deflate,
- deflate_images=deflate_images,
- deflate_fonts=deflate_fonts,
- incremental=incremental,
- ascii=ascii,
- expand=expand,
- linear=linear,
- pretty=pretty,
- encryption=encryption,
- permissions=permissions,
- owner_pw=owner_pw,
- user_pw=user_pw,
- no_new_id=no_new_id,
- preserve_metadata=preserve_metadata,
- use_objstms=use_objstms,
- compression_effort=compression_effort,
- )
- def find_bookmark(self, bm):
- """Find new location after layouting a document."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- location = mupdf.fz_lookup_bookmark2( self.this, bm)
- return location.chapter, location.page
- def fullcopy_page(self, pno, to=-1):
- """Make a full page duplicate."""
- pdf = _as_pdf_document(self)
- page_count = mupdf.pdf_count_pages( pdf)
- try:
- if (not _INRANGE(pno, 0, page_count - 1)
- or not _INRANGE(to, -1, page_count - 1)
- ):
- raise ValueError( MSG_BAD_PAGENO)
- page1 = mupdf.pdf_resolve_indirect( mupdf.pdf_lookup_page_obj( pdf, pno))
- page2 = mupdf.pdf_deep_copy_obj( page1)
- old_annots = mupdf.pdf_dict_get( page2, PDF_NAME('Annots'))
- # copy annotations, but remove Popup and IRT types
- if old_annots.m_internal:
- n = mupdf.pdf_array_len( old_annots)
- new_annots = mupdf.pdf_new_array( pdf, n)
- for i in range(n):
- o = mupdf.pdf_array_get( old_annots, i)
- subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
- if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
- continue
- if mupdf.pdf_dict_gets( o, "IRT").m_internal:
- continue
- copy_o = mupdf.pdf_deep_copy_obj( mupdf.pdf_resolve_indirect( o))
- xref = mupdf.pdf_create_object( pdf)
- mupdf.pdf_update_object( pdf, xref, copy_o)
- copy_o = mupdf.pdf_new_indirect( pdf, xref, 0)
- mupdf.pdf_dict_del( copy_o, PDF_NAME('Popup'))
- mupdf.pdf_dict_del( copy_o, PDF_NAME('P'))
- mupdf.pdf_array_push( new_annots, copy_o)
- mupdf.pdf_dict_put( page2, PDF_NAME('Annots'), new_annots)
- # copy the old contents stream(s)
- res = JM_read_contents( page1)
- # create new /Contents object for page2
- if res and res.m_internal:
- #contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" ", 1), NULL, 0)
- contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" "), mupdf.PdfObj(), 0)
- JM_update_stream( pdf, contents, res, 1)
- mupdf.pdf_dict_put( page2, PDF_NAME('Contents'), contents)
- # now insert target page, making sure it is an indirect object
- xref = mupdf.pdf_create_object( pdf) # get new xref
- mupdf.pdf_update_object( pdf, xref, page2) # store new page
- page2 = mupdf.pdf_new_indirect( pdf, xref, 0) # reread object
- mupdf.pdf_insert_page( pdf, to, page2) # and store the page
- finally:
- mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
- self._reset_page_refs()
- def get_char_widths(
- doc: 'Document',
- xref: int,
- limit: int = 256,
- idx: int = 0,
- fontdict: OptDict = None,
- ) -> list:
- """Get list of glyph information of a font.
- Notes:
- Must be provided by its XREF number. If we already dealt with the
- font, it will be recorded in doc.FontInfos. Otherwise we insert an
- entry there.
- Finally we return the glyphs for the font. This is a list of
- (glyph, width) where glyph is an integer controlling the char
- appearance, and width is a float controlling the char's spacing:
- width * fontsize is the actual space.
- For 'simple' fonts, glyph == ord(char) will usually be true.
- Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
- """
- fontinfo = CheckFontInfo(doc, xref)
- if fontinfo is None: # not recorded yet: create it
- if fontdict is None:
- name, ext, stype, asc, dsc = utils._get_font_properties(doc, xref)
- fontdict = {
- "name": name,
- "type": stype,
- "ext": ext,
- "ascender": asc,
- "descender": dsc,
- }
- else:
- name = fontdict["name"]
- ext = fontdict["ext"]
- stype = fontdict["type"]
- ordering = fontdict["ordering"]
- simple = fontdict["simple"]
- if ext == "":
- raise ValueError("xref is not a font")
- # check for 'simple' fonts
- if stype in ("Type1", "MMType1", "TrueType"):
- simple = True
- else:
- simple = False
- # check for CJK fonts
- if name in ("Fangti", "Ming"):
- ordering = 0
- elif name in ("Heiti", "Song"):
- ordering = 1
- elif name in ("Gothic", "Mincho"):
- ordering = 2
- elif name in ("Dotum", "Batang"):
- ordering = 3
- else:
- ordering = -1
- fontdict["simple"] = simple
- if name == "ZapfDingbats":
- glyphs = zapf_glyphs
- elif name == "Symbol":
- glyphs = symbol_glyphs
- else:
- glyphs = None
- fontdict["glyphs"] = glyphs
- fontdict["ordering"] = ordering
- fontinfo = [xref, fontdict]
- doc.FontInfos.append(fontinfo)
- else:
- fontdict = fontinfo[1]
- glyphs = fontdict["glyphs"]
- simple = fontdict["simple"]
- ordering = fontdict["ordering"]
- if glyphs is None:
- oldlimit = 0
- else:
- oldlimit = len(glyphs)
- mylimit = max(256, limit)
- if mylimit <= oldlimit:
- return glyphs
- if ordering < 0: # not a CJK font
- glyphs = doc._get_char_widths(
- xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
- )
- else: # CJK fonts use char codes and width = 1
- glyphs = None
- fontdict["glyphs"] = glyphs
- fontinfo[1] = fontdict
- UpdateFontInfo(doc, fontinfo)
- return glyphs
- def get_layer(self, config=-1):
- """Content of ON, OFF, RBGroups of an OC layer."""
- pdf = _as_pdf_document(self)
- ocp = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('OCProperties'),
- )
- if not ocp.m_internal:
- return
- if config == -1:
- obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
- else:
- obj = mupdf.pdf_array_get(
- mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
- config,
- )
- if not obj.m_internal:
- raise ValueError( MSG_BAD_OC_CONFIG)
- rc = JM_get_ocg_arrays( obj)
- return rc
- def get_layers(self):
- """Show optional OC layers."""
- pdf = _as_pdf_document(self)
- n = mupdf.pdf_count_layer_configs( pdf)
- if n == 1:
- obj = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('OCProperties'),
- PDF_NAME('Configs'),
- )
- if not mupdf.pdf_is_array( obj):
- n = 0
- rc = []
- info = mupdf.PdfLayerConfig()
- for i in range(n):
- mupdf.pdf_layer_config_info( pdf, i, info)
- item = {
- "number": i,
- "name": info.name,
- "creator": info.creator,
- }
- rc.append( item)
- return rc
- def get_new_xref(self):
- """Make new xref."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xref = 0
- ENSURE_OPERATION(pdf)
- xref = mupdf.pdf_create_object(pdf)
- return xref
- def get_oc(doc: 'Document', xref: int) -> int:
- """Return optional content object xref for an image or form xobject.
- Args:
- xref: (int) xref number of an image or form xobject.
- """
- if doc.is_closed or doc.is_encrypted:
- raise ValueError("document close or encrypted")
- t, name = doc.xref_get_key(xref, "Subtype")
- if t != "name" or name not in ("/Image", "/Form"):
- raise ValueError("bad object type at xref %i" % xref)
- t, oc = doc.xref_get_key(xref, "OC")
- if t != "xref":
- return 0
- rc = int(oc.replace("0 R", ""))
- return rc
-
- def get_ocgs(self):
- """Show existing optional content groups."""
- ci = mupdf.pdf_new_name( "CreatorInfo")
- pdf = _as_pdf_document(self)
- ocgs = mupdf.pdf_dict_getl(
- mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')),
- PDF_NAME('OCProperties'),
- PDF_NAME('OCGs'),
- )
- rc = dict()
- if not mupdf.pdf_is_array( ocgs):
- return rc
- n = mupdf.pdf_array_len( ocgs)
- for i in range(n):
- ocg = mupdf.pdf_array_get( ocgs, i)
- xref = mupdf.pdf_to_num( ocg)
- name = mupdf.pdf_to_text_string( mupdf.pdf_dict_get( ocg, PDF_NAME('Name')))
- obj = mupdf.pdf_dict_getl( ocg, PDF_NAME('Usage'), ci, PDF_NAME('Subtype'))
- usage = None
- if obj.m_internal:
- usage = mupdf.pdf_to_name( obj)
- intents = list()
- intent = mupdf.pdf_dict_get( ocg, PDF_NAME('Intent'))
- if intent.m_internal:
- if mupdf.pdf_is_name( intent):
- intents.append( mupdf.pdf_to_name( intent))
- elif mupdf.pdf_is_array( intent):
- m = mupdf.pdf_array_len( intent)
- for j in range(m):
- o = mupdf.pdf_array_get( intent, j)
- if mupdf.pdf_is_name( o):
- intents.append( mupdf.pdf_to_name( o))
- if mupdf_version_tuple >= (1, 26, 11):
- resource_stack = mupdf.PdfResourceStack()
- hidden = mupdf.pdf_is_ocg_hidden( pdf, resource_stack, usage, ocg)
- else:
- hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
- item = {
- "name": name,
- "intent": intents,
- "on": not hidden,
- "usage": usage,
- }
- temp = xref
- rc[ temp] = item
- return rc
- def get_ocmd(doc: 'Document', xref: int) -> dict:
- """Return the definition of an OCMD (optional content membership dictionary).
- Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
- /VE (visibility expression, PDF array). Via string manipulation, this
- info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
- and "ve" - ready to recycle as input for 'set_ocmd()'.
- """
- if xref not in range(doc.xref_length()):
- raise ValueError("bad xref")
- text = doc.xref_object(xref, compressed=True)
- if "/Type/OCMD" not in text:
- raise ValueError("bad object type")
- textlen = len(text)
- p0 = text.find("/OCGs[") # look for /OCGs key
- p1 = text.find("]", p0)
- if p0 < 0 or p1 < 0: # no OCGs found
- ocgs = None
- else:
- ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
- ocgs = list(map(int, ocgs))
- p0 = text.find("/P/") # look for /P policy key
- if p0 < 0:
- policy = None
- else:
- p1 = text.find("ff", p0)
- if p1 < 0:
- p1 = text.find("on", p0)
- if p1 < 0: # some irregular syntax
- raise ValueError("bad object at xref")
- else:
- policy = text[p0 + 3 : p1 + 2]
- p0 = text.find("/VE[") # look for /VE visibility expression key
- if p0 < 0: # no visibility expression found
- ve = None
- else:
- lp = rp = 0 # find end of /VE by finding last ']'.
- p1 = p0
- while lp < 1 or lp != rp:
- p1 += 1
- if not p1 < textlen: # some irregular syntax
- raise ValueError("bad object at xref")
- if text[p1] == "[":
- lp += 1
- if text[p1] == "]":
- rp += 1
- # p1 now positioned at the last "]"
- ve = text[p0 + 3 : p1 + 1] # the PDF /VE array
- ve = (
- ve.replace("/And", '"and",')
- .replace("/Not", '"not",')
- .replace("/Or", '"or",')
- )
- ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
- import json
- try:
- ve = json.loads(ve)
- except Exception:
- exception_info()
- message(f"bad /VE key: {ve!r}")
- raise
- return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}
- def get_outline_xrefs(self):
- """Get list of outline xref numbers."""
- xrefs = []
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return xrefs
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- if not root.m_internal:
- return xrefs
- olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
- if not olroot.m_internal:
- return xrefs
- first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
- if not first.m_internal:
- return xrefs
- xrefs = JM_outline_xrefs(first, xrefs)
- return xrefs
- def get_page_fonts(self, pno: int, full: bool =False) -> list:
- """Retrieve a list of fonts used on a page.
- """
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if not self.is_pdf:
- return ()
- if type(pno) is not int:
- try:
- pno = pno.number
- except Exception:
- exception_info()
- raise ValueError("need a Page or page number")
- val = self._getPageInfo(pno, 1)
- if not full:
- return [v[:-1] for v in val]
- return val
- def get_page_images(self, pno: int, full: bool =False) -> list:
- """Retrieve a list of images used on a page.
- """
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if not self.is_pdf:
- return ()
- val = self._getPageInfo(pno, 2)
- if not full:
- return [v[:-1] for v in val]
- return val
- def get_page_labels(self):
- """Return page label definitions in PDF document.
- Returns:
- A list of dictionaries with the following format:
- {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
- """
- # Jorj McKie, 2021-01-10
- return [utils.rule_dict(item) for item in self._get_page_labels()]
- def get_page_numbers(doc, label, only_one=False):
- """Return a list of page numbers with the given label.
- Args:
- doc: PDF document object (resp. 'self').
- label: (str) label.
- only_one: (bool) stop searching after first hit.
- Returns:
- List of page numbers having this label.
- """
- # Jorj McKie, 2021-01-06
- numbers = []
- if not label:
- return numbers
- labels = doc._get_page_labels()
- if labels == []:
- return numbers
- for i in range(doc.page_count):
- plabel = utils.get_label_pno(i, labels)
- if plabel == label:
- numbers.append(i)
- if only_one:
- break
- return numbers
-
- def get_page_pixmap(
- doc: 'Document',
- pno: int,
- *,
- matrix: matrix_like = None,
- dpi=None,
- colorspace: Colorspace = None,
- clip: rect_like = None,
- alpha: bool = False,
- annots: bool = True,
- ) -> 'Pixmap':
- """Create pixmap of document page by page number.
- Notes:
- Convenience function calling page.get_pixmap.
- Args:
- pno: (int) page number
- matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity).
- colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB.
- clip: (irect-like) restrict rendering to this area.
- alpha: (bool) include alpha channel
- annots: (bool) also render annotations
- """
- if matrix is None:
- matrix = Identity
- if colorspace is None:
- colorspace = csRGB
- return doc[pno].get_pixmap(
- matrix=matrix,
- dpi=dpi, colorspace=colorspace,
- clip=clip,
- alpha=alpha,
- annots=annots
- )
-
- def get_page_text(
- doc: 'Document',
- pno: int,
- option: str = "text",
- clip: rect_like = None,
- flags: OptInt = None,
- textpage: 'TextPage' = None,
- sort: bool = False,
- ) -> typing.Any:
- """Extract a document page's text by page number.
- Notes:
- Convenience function calling page.get_text().
- Args:
- pno: page number
- option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
- Returns:
- output from page.TextPage().
- """
- return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)
-
- def get_page_xobjects(self, pno: int) -> list:
- """Retrieve a list of XObjects used on a page.
- """
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if not self.is_pdf:
- return ()
- val = self._getPageInfo(pno, 3)
- return val
- def get_sigflags(self):
- """Get the /SigFlags value."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return -1 # not a PDF
- sigflags = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('SigFlags'),
- )
- sigflag = -1
- if sigflags.m_internal:
- sigflag = mupdf.pdf_to_int(sigflags)
- return sigflag
- def get_toc(
- doc: 'Document',
- simple: bool = True,
- ) -> list:
- """Create a table of contents.
- Args:
- simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
- """
- def recurse(olItem, liste, lvl):
- """Recursively follow the outline item chain and record item information in a list."""
- while olItem and olItem.this.m_internal:
- if olItem.title:
- title = olItem.title
- else:
- title = " "
- if not olItem.is_external:
- if olItem.uri:
- if olItem.page == -1:
- resolve = doc.resolve_link(olItem.uri)
- page = resolve[0] + 1
- else:
- page = olItem.page + 1
- else:
- page = -1
- else:
- page = -1
- if not simple:
- link = utils.getLinkDict(olItem, doc)
- liste.append([lvl, title, page, link])
- else:
- liste.append([lvl, title, page])
- if olItem.down:
- liste = recurse(olItem.down, liste, lvl + 1)
- olItem = olItem.next
- return liste
- # ensure document is open
- if doc.is_closed:
- raise ValueError("document closed")
- doc.init_doc()
- olItem = doc.outline
- if not olItem:
- return []
- lvl = 1
- liste = []
- toc = recurse(olItem, liste, lvl)
- if doc.is_pdf and not simple:
- doc._extend_toc_items(toc)
- return toc
-
- def get_xml_metadata(self):
- """Get document XML metadata."""
- xml = None
- pdf = _as_pdf_document(self, required=0)
- if pdf.m_internal:
- xml = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Metadata'),
- )
- if xml is not None and xml.m_internal:
- buff = mupdf.pdf_load_stream(xml)
- rc = JM_UnicodeFromBuffer(buff)
- else:
- rc = ''
- return rc
- def has_annots(doc: 'Document') -> bool:
- """Check whether there are annotations on any page."""
- if doc.is_closed:
- raise ValueError("document closed")
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- for i in range(doc.page_count):
- for item in doc.page_annot_xrefs(i):
- # pylint: disable=no-member
- if not (item[1] == mupdf.PDF_ANNOT_LINK or item[1] == mupdf.PDF_ANNOT_WIDGET): # pylint: disable=no-member
- return True
- return False
-
- def has_links(doc: 'Document') -> bool:
- """Check whether there are links on any page."""
- if doc.is_closed:
- raise ValueError("document closed")
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- for i in range(doc.page_count):
- for item in doc.page_annot_xrefs(i):
- if item[1] == mupdf.PDF_ANNOT_LINK: # pylint: disable=no-member
- return True
- return False
-
- def init_doc(self):
- if self.is_encrypted:
- raise ValueError("cannot initialize - document still encrypted")
- self._outline = self._loadOutline()
- self.metadata = dict(
- [
- (k,self._getMetadata(v)) for k,v in {
- 'format':'format',
- 'title':'info:Title',
- 'author':'info:Author',
- 'subject':'info:Subject',
- 'keywords':'info:Keywords',
- 'creator':'info:Creator',
- 'producer':'info:Producer',
- 'creationDate':'info:CreationDate',
- 'modDate':'info:ModDate',
- 'trapped':'info:Trapped'
- }.items()
- ]
- )
- self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption')
- def insert_file(self,
- infile,
- from_page=-1,
- to_page=-1,
- start_at=-1,
- rotate=-1,
- links=True,
- annots=True,
- show_progress=0,
- final=1,
- ):
- '''
- Insert an arbitrary supported document to an existing PDF.
- The infile may be given as a filename, a Document or a Pixmap. Other
- parameters - where applicable - equal those of insert_pdf().
- '''
- src = None
- if isinstance(infile, Pixmap):
- if infile.colorspace.n > 3:
- infile = Pixmap(csRGB, infile)
- src = Document("png", infile.tobytes())
- elif isinstance(infile, Document):
- src = infile
- else:
- src = Document(infile)
- if not src:
- raise ValueError("bad infile parameter")
- if not src.is_pdf:
- pdfbytes = src.convert_to_pdf()
- src = Document("pdf", pdfbytes)
- return self.insert_pdf(
- src,
- from_page=from_page,
- to_page=to_page,
- start_at=start_at,
- rotate=rotate,
- links=links,
- annots=annots,
- show_progress=show_progress,
- final=final,
- )
- def insert_page(
- doc: 'Document',
- pno: int,
- text: typing.Union[str, list, None] = None,
- fontsize: float = 11,
- width: float = 595,
- height: float = 842,
- fontname: str = "helv",
- fontfile: OptStr = None,
- color: OptSeq = (0,),
- ) -> int:
- """Create a new PDF page and insert some text.
- Notes:
- Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text().
- For parameter details see these methods.
- """
- page = doc.new_page(pno=pno, width=width, height=height)
- if not bool(text):
- return 0
- rc = page.insert_text(
- (50, 72),
- text,
- fontsize=fontsize,
- fontname=fontname,
- fontfile=fontfile,
- color=color,
- )
- return rc
-
- def insert_pdf(
- self,
- docsrc,
- *,
- from_page=-1,
- to_page=-1,
- start_at=-1,
- rotate=-1,
- links=1,
- annots=1,
- widgets=1,
- join_duplicates=0,
- show_progress=0,
- final=1,
- _gmap=None,
- ):
- """Insert a page range from another PDF.
- Args:
- docsrc: PDF to copy from. Must be different object, but may be same file.
- from_page: (int) first source page to copy, 0-based, default 0.
- to_page: (int) last source page to copy, 0-based, default last page.
- start_at: (int) from_page will become this page number in target.
- rotate: (int) rotate copied pages, default -1 is no change.
- links: (int/bool) whether to also copy links.
- annots: (int/bool) whether to also copy annotations.
- widgets: (int/bool) whether to also copy form fields.
- join_duplicates: (int/bool) join or rename duplicate widget names.
- show_progress: (int) progress message interval, 0 is no messages.
- final: (bool) indicates last insertion from this source PDF.
- _gmap: internal use only
- Copy sequence reversed if from_page > to_page."""
- # Insert pages from a source PDF into this PDF.
- # For reconstructing the links (_do_links method), we must save the
- # insertion point (start_at) if it was specified as -1.
- #log( 'insert_pdf(): start')
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if self._graft_id == docsrc._graft_id:
- raise ValueError("source and target cannot be same object")
- sa = start_at
- if sa < 0:
- sa = self.page_count
- outCount = self.page_count
- srcCount = docsrc.page_count
- # local copies of page numbers
- fp = from_page
- tp = to_page
- sa = start_at
- # normalize page numbers
- fp = max(fp, 0) # -1 = first page
- fp = min(fp, srcCount - 1) # but do not exceed last page
- if tp < 0:
- tp = srcCount - 1 # -1 = last page
- tp = min(tp, srcCount - 1) # but do not exceed last page
- if sa < 0:
- sa = outCount # -1 = behind last page
- sa = min(sa, outCount) # but that is also the limit
- if len(docsrc) > show_progress > 0:
- inname = os.path.basename(docsrc.name)
- if not inname:
- inname = "memory PDF"
- outname = os.path.basename(self.name)
- if not outname:
- outname = "memory PDF"
- message("Inserting '%s' at '%s'" % (inname, outname))
- # retrieve / make a Graftmap to avoid duplicate objects
- #log( 'insert_pdf(): Graftmaps')
- isrt = docsrc._graft_id
- _gmap = self.Graftmaps.get(isrt, None)
- if _gmap is None:
- #log( 'insert_pdf(): Graftmaps2')
- _gmap = Graftmap(self)
- self.Graftmaps[isrt] = _gmap
- if g_use_extra:
- #log( 'insert_pdf(): calling extra_FzDocument_insert_pdf()')
- extra_FzDocument_insert_pdf(
- self.this,
- docsrc.this,
- from_page,
- to_page,
- start_at,
- rotate,
- links,
- annots,
- show_progress,
- final,
- _gmap,
- )
- #log( 'insert_pdf(): extra_FzDocument_insert_pdf() returned.')
- else:
- pdfout = _as_pdf_document(self)
- pdfsrc = _as_pdf_document(docsrc)
- if not pdfout.m_internal or not pdfsrc.m_internal:
- raise TypeError( "source or target not a PDF")
- ENSURE_OPERATION(pdfout)
- JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap)
-
- #log( 'insert_pdf(): calling self._reset_page_refs()')
- self._reset_page_refs()
- if links:
- #log( 'insert_pdf(): calling self._do_links()')
- self._do_links(docsrc, from_page=fp, to_page=tp, start_at=sa)
- if widgets:
- self._do_widgets(docsrc, _gmap, from_page=fp, to_page=tp, start_at=sa, join_duplicates=join_duplicates)
- if final == 1:
- self.Graftmaps[isrt] = None
- #log( 'insert_pdf(): returning')
- @property
- def is_dirty(self):
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False
- r = mupdf.pdf_has_unsaved_changes(pdf)
- return True if r else False
- @property
- def is_fast_webaccess(self):
- '''
- Check whether we have a linearized PDF.
- '''
- pdf = _as_pdf_document(self, required=0)
- if pdf.m_internal:
- return mupdf.pdf_doc_was_linearized(pdf)
- return False # gracefully handle non-PDF
- @property
- def is_form_pdf(self):
- """Either False or PDF field count."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False
- count = -1
- try:
- fields = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- mupdf.PDF_ENUM_NAME_Root,
- mupdf.PDF_ENUM_NAME_AcroForm,
- mupdf.PDF_ENUM_NAME_Fields,
- )
- if mupdf.pdf_is_array(fields):
- count = mupdf.pdf_array_len(fields)
- except Exception:
- if g_exceptions_verbose: exception_info()
- return False
- if count >= 0:
- return count
- return False
- @property
- def is_pdf(self):
- """Check for PDF."""
- if isinstance(self.this, mupdf.PdfDocument):
- return True
- # Avoid calling smupdf.pdf_specifics because it will end up creating
- # a new PdfDocument which will call pdf_create_document(), which is ok
- # but a little unnecessary.
- #
- if mupdf.ll_pdf_specifics(self.this.m_internal):
- ret = True
- else:
- ret = False
- return ret
- @property
- def is_reflowable(self):
- """Check if document is layoutable."""
- if self.is_closed:
- raise ValueError("document closed")
- return bool(mupdf.fz_is_document_reflowable(self))
- @property
- def is_repaired(self):
- """Check whether PDF was repaired."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False
- r = mupdf.pdf_was_repaired(pdf)
- if r:
- return True
- return False
- def journal_can_do(self):
- """Show if undo and / or redo are possible."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- undo=0
- redo=0
- pdf = _as_pdf_document(self)
- undo = mupdf.pdf_can_undo(pdf)
- redo = mupdf.pdf_can_redo(pdf)
- return {'undo': bool(undo), 'redo': bool(redo)}
- def journal_enable(self):
- """Activate document journalling."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- mupdf.pdf_enable_journal(pdf)
- def journal_is_enabled(self):
- """Check if journalling is enabled."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- enabled = pdf.m_internal and pdf.m_internal.journal
- return enabled
- def journal_load(self, filename):
- """Load a journal from a file."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- if isinstance(filename, str):
- mupdf.pdf_load_journal(pdf, filename)
- else:
- res = JM_BufferFromBytes(filename)
- stm = mupdf.fz_open_buffer(res)
- mupdf.pdf_deserialise_journal(pdf, stm)
- if not pdf.m_internal.journal:
- RAISEPY( "Journal and document do not match", JM_Exc_FileDataError)
- def journal_op_name(self, step):
- """Show operation name for given step."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- name = mupdf.pdf_undoredo_step(pdf, step)
- return name
- def journal_position(self):
- """Show journalling state."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- steps=0
- pdf = _as_pdf_document(self)
- rc, steps = mupdf.pdf_undoredo_state(pdf)
- return rc, steps
- def journal_redo(self):
- """Move forward in the journal."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- mupdf.pdf_redo(pdf)
- return True
- def journal_save(self, filename):
- """Save journal to a file."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- if isinstance(filename, str):
- mupdf.pdf_save_journal(pdf, filename)
- else:
- out = JM_new_output_fileptr(filename)
- mupdf.pdf_write_journal(pdf, out)
- out.fz_close_output()
- def journal_start_op(self, name=None):
- """Begin a journalling operation."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- if not pdf.m_internal.journal:
- raise RuntimeError( "Journalling not enabled")
- if name:
- mupdf.pdf_begin_operation(pdf, name)
- else:
- mupdf.pdf_begin_implicit_operation(pdf)
- def journal_stop_op(self):
- """End a journalling operation."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- mupdf.pdf_end_operation(pdf)
- def journal_undo(self):
- """Move backwards in the journal."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- mupdf.pdf_undo(pdf)
- return True
- @property
- def language(self):
- """Document language."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return
- lang = mupdf.pdf_document_language(pdf)
- if lang == mupdf.FZ_LANG_UNSET:
- return
- return mupdf.fz_string_from_text_language2(lang)
- @property
- def last_location(self):
- """Id (chapter, page) of last page."""
- if self.is_closed:
- raise ValueError("document closed")
- last_loc = mupdf.fz_last_page(self.this)
- return last_loc.chapter, last_loc.page
- def layer_ui_configs(self):
- """Show OC visibility status modifiable by user."""
- pdf = _as_pdf_document(self)
- info = mupdf.PdfLayerConfigUi()
- n = mupdf.pdf_count_layer_config_ui( pdf)
- rc = []
- for i in range(n):
- mupdf.pdf_layer_config_ui_info( pdf, i, info)
- if info.type == 1:
- type_ = "checkbox"
- elif info.type == 2:
- type_ = "radiobox"
- else:
- type_ = "label"
- item = {
- "number": i,
- "text": info.text,
- "depth": info.depth,
- "type": type_,
- "on": info.selected,
- "locked": info.locked,
- }
- rc.append(item)
- return rc
- def layout(self, rect=None, width=0, height=0, fontsize=11):
- """Re-layout a reflowable document."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- doc = self.this
- if not mupdf.fz_is_document_reflowable( doc):
- return
- w = width
- h = height
- r = JM_rect_from_py(rect)
- if not mupdf.fz_is_infinite_rect(r):
- w = r.x1 - r.x0
- h = r.y1 - r.y0
- if w <= 0.0 or h <= 0.0:
- raise ValueError( "bad page size")
- mupdf.fz_layout_document( doc, w, h, fontsize)
- self._reset_page_refs()
- self.init_doc()
- def load_page(self, page_id):
- """Load a page.
- 'page_id' is either a 0-based page number or a tuple (chapter, pno),
- with chapter number and page number within that chapter.
- """
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if page_id is None:
- page_id = 0
- if page_id not in self:
- raise ValueError("page not in document")
- if type(page_id) is int and page_id < 0:
- np = self.page_count
- while page_id < 0:
- page_id += np
- if isinstance(page_id, int):
- page = mupdf.fz_load_page(self.this, page_id)
- else:
- chapter, pagenum = page_id
- page = mupdf.fz_load_chapter_page(self.this, chapter, pagenum)
- val = Page(page, self)
- val.thisown = True
- val.parent = self
- self._page_refs[id(val)] = val
- val._annot_refs = weakref.WeakValueDictionary()
- val.number = page_id
- return val
- def location_from_page_number(self, pno):
- """Convert pno to (chapter, page)."""
- if self.is_closed:
- raise ValueError("document closed")
- this_doc = self.this
- loc = mupdf.fz_make_location(-1, -1)
- page_count = mupdf.fz_count_pages(this_doc)
- while pno < 0:
- pno += page_count
- if pno >= page_count:
- raise ValueError( MSG_BAD_PAGENO)
- loc = mupdf.fz_location_from_page_number(this_doc, pno)
- return loc.chapter, loc.page
- def make_bookmark(self, loc):
- """Make a page pointer before layouting document."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- loc = mupdf.FzLocation(*loc)
- mark = mupdf.ll_fz_make_bookmark2( self.this.m_internal, loc.internal())
- return mark
- @property
- def markinfo(self) -> dict:
- """Return the PDF MarkInfo value."""
- xref = self.pdf_catalog()
- if xref == 0:
- return None
- rc = self.xref_get_key(xref, "MarkInfo")
- if rc[0] == "null":
- return {}
- if rc[0] == "xref":
- xref = int(rc[1].split()[0])
- val = self.xref_object(xref, compressed=True)
- elif rc[0] == "dict":
- val = rc[1]
- else:
- val = None
- if val is None or not (val[:2] == "<<" and val[-2:] == ">>"):
- return {}
- valid = {"Marked": False, "UserProperties": False, "Suspects": False}
- val = val[2:-2].split("/")
- for v in val[1:]:
- try:
- key, value = v.split()
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return valid
- if value == "true":
- valid[key] = True
- return valid
- def move_page(self, pno: int, to: int =-1):
- """Move a page within a PDF document.
- Args:
- pno: source page number.
- to: put before this page, '-1' means after last page.
- """
- if self.is_closed:
- raise ValueError("document closed")
- page_count = len(self)
- if (pno not in range(page_count) or to not in range(-1, page_count)):
- raise ValueError("bad page number(s)")
- before = 1
- copy = 0
- if to == -1:
- to = page_count - 1
- before = 0
- return self._move_copy_page(pno, to, before, copy)
- @property
- def name(self):
- return self._name
-
- def need_appearances(self, value=None):
- """Get/set the NeedAppearances value."""
- if not self.is_form_pdf:
- return None
-
- pdf = _as_pdf_document(self)
- oldval = -1
- appkey = "NeedAppearances"
-
- form = mupdf.pdf_dict_getp(
- mupdf.pdf_trailer(pdf),
- "Root/AcroForm",
- )
- app = mupdf.pdf_dict_gets(form, appkey)
- if mupdf.pdf_is_bool(app):
- oldval = mupdf.pdf_to_bool(app)
- if value:
- mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_TRUE)
- else:
- mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_FALSE)
- if value is None:
- return oldval >= 0
- return value
- @property
- def needs_pass(self):
- """Indicate password required."""
- if self.is_closed:
- raise ValueError("document closed")
- document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super()
- ret = mupdf.fz_needs_password( document)
- return ret
- def new_page(
- doc: 'Document',
- pno: int = -1,
- width: float = 595,
- height: float = 842,
- ) -> Page:
- """Create and return a new page object.
- Args:
- pno: (int) insert before this page. Default: after last page.
- width: (float) page width in points. Default: 595 (ISO A4 width).
- height: (float) page height in points. Default 842 (ISO A4 height).
- Returns:
- A pymupdf.Page object.
- """
- doc._newPage(pno, width=width, height=height)
- return doc[pno]
-
- def next_location(self, page_id):
- """Get (chapter, page) of next page."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if type(page_id) is int:
- page_id = (0, page_id)
- if page_id not in self:
- raise ValueError("page id not in document")
- if tuple(page_id) == self.last_location:
- return ()
- this_doc = _as_fz_document(self)
- val = page_id[ 0]
- if not isinstance(val, int):
- RAISEPY(MSG_BAD_PAGEID, PyExc_ValueError)
- chapter = val
- val = page_id[ 1]
- pno = val
- loc = mupdf.fz_make_location(chapter, pno)
- next_loc = mupdf.fz_next_page( this_doc, loc)
- return next_loc.chapter, next_loc.page
- def page_annot_xrefs(self, n):
- if g_use_extra:
- return extra.page_annot_xrefs( self.this, n)
-
- if isinstance(self.this, mupdf.PdfDocument):
- page_count = mupdf.pdf_count_pages(self.this)
- pdf_document = self.this
- else:
- page_count = mupdf.fz_count_pages(self.this)
- pdf_document = _as_pdf_document(self)
- while n < 0:
- n += page_count
- if n > page_count:
- raise ValueError( MSG_BAD_PAGENO)
- page_obj = mupdf.pdf_lookup_page_obj(pdf_document, n)
- annots = JM_get_annot_xref_list(page_obj)
- return annots
- @property
- def page_count(self):
- """Number of pages."""
- if self.is_closed:
- raise ValueError('document closed')
- if g_use_extra:
- return self.page_count2(self)
- if isinstance( self.this, mupdf.FzDocument):
- return mupdf.fz_count_pages( self.this)
- else:
- return mupdf.pdf_count_pages( self.this)
- def page_cropbox(self, pno):
- """Get CropBox of page number (without loading page)."""
- if self.is_closed:
- raise ValueError("document closed")
- this_doc = self.this
- page_count = mupdf.fz_count_pages( this_doc)
- n = pno
- while n < 0:
- n += page_count
- pdf = _as_pdf_document(self)
- if n >= page_count:
- raise ValueError( MSG_BAD_PAGENO)
- pageref = mupdf.pdf_lookup_page_obj( pdf, n)
- cropbox = JM_cropbox(pageref)
- val = JM_py_from_rect(cropbox)
- val = Rect(val)
- return val
- def page_number_from_location(self, page_id):
- """Convert (chapter, pno) to page number."""
- if type(page_id) is int:
- np = self.page_count
- while page_id < 0:
- page_id += np
- page_id = (0, page_id)
- if page_id not in self:
- raise ValueError("page id not in document")
- chapter, pno = page_id
- loc = mupdf.fz_make_location( chapter, pno)
- page_n = mupdf.fz_page_number_from_location( self.this, loc)
- return page_n
- def page_xref(self, pno):
- """Get xref of page number."""
- if g_use_extra:
- return extra.page_xref( self.this, pno)
- if self.is_closed:
- raise ValueError("document closed")
- page_count = mupdf.fz_count_pages(self.this)
- n = pno
- while n < 0:
- n += page_count
- pdf = _as_pdf_document(self)
- xref = 0
- if n >= page_count:
- raise ValueError( MSG_BAD_PAGENO)
- xref = mupdf.pdf_to_num(mupdf.pdf_lookup_page_obj(pdf, n))
- return xref
- @property
- def pagelayout(self) -> str:
- """Return the PDF PageLayout value.
- """
- xref = self.pdf_catalog()
- if xref == 0:
- return None
- rc = self.xref_get_key(xref, "PageLayout")
- if rc[0] == "null":
- return "SinglePage"
- if rc[0] == "name":
- return rc[1][1:]
- return "SinglePage"
- @property
- def pagemode(self) -> str:
- """Return the PDF PageMode value.
- """
- xref = self.pdf_catalog()
- if xref == 0:
- return None
- rc = self.xref_get_key(xref, "PageMode")
- if rc[0] == "null":
- return "UseNone"
- if rc[0] == "name":
- return rc[1][1:]
- return "UseNone"
- if sys.implementation.version < (3, 9):
- # Appending `[Page]` causes `TypeError: 'ABCMeta' object is not subscriptable`.
- _pages_ret = collections.abc.Iterable
- else:
- _pages_ret = collections.abc.Iterable[Page]
- def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None) -> _pages_ret:
- """Return a generator iterator over a page range.
- Arguments have the same meaning as for the range() built-in.
- """
- if not self.page_count:
- return
- # set the start value
- start = start or 0
- while start < 0:
- start += self.page_count
- if start not in range(self.page_count):
- raise ValueError("bad start page number")
- # set the stop value
- stop = stop if stop is not None and stop <= self.page_count else self.page_count
- # set the step value
- if step == 0:
- raise ValueError("arg 3 must not be zero")
- if step is None:
- if start > stop:
- step = -1
- else:
- step = 1
- for pno in range(start, stop, step):
- yield (self.load_page(pno))
- def pdf_catalog(self):
- """Get xref of PDF catalog."""
- pdf = _as_pdf_document(self, required=0)
- xref = 0
- if not pdf.m_internal:
- return xref
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- xref = mupdf.pdf_to_num(root)
- return xref
- def pdf_trailer(self, compressed=0, ascii=0):
- """Get PDF trailer as a string."""
- return self.xref_object(-1, compressed=compressed, ascii=ascii)
- @property
- def permissions(self):
- """Document permissions."""
- if self.is_encrypted:
- return 0
- doc =self.this
- pdf = mupdf.pdf_document_from_fz_document(doc)
- # for PDF return result of standard function
- if pdf.m_internal:
- return mupdf.pdf_document_permissions(pdf)
- # otherwise simulate the PDF return value
- perm = 0xFFFFFFFC # all permissions granted
- # now switch off where needed
- if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_PRINT):
- perm = perm ^ mupdf.PDF_PERM_PRINT
- if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_EDIT):
- perm = perm ^ mupdf.PDF_PERM_MODIFY
- if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_COPY):
- perm = perm ^ mupdf.PDF_PERM_COPY
- if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_ANNOTATE):
- perm = perm ^ mupdf.PDF_PERM_ANNOTATE
- return perm
- def prev_location(self, page_id):
- """Get (chapter, page) of previous page."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if type(page_id) is int:
- page_id = (0, page_id)
- if page_id not in self:
- raise ValueError("page id not in document")
- if page_id == (0, 0):
- return ()
- chapter, pno = page_id
- loc = mupdf.fz_make_location(chapter, pno)
- prev_loc = mupdf.fz_previous_page(self.this, loc)
- return prev_loc.chapter, prev_loc.page
- def reload_page(self, page: Page) -> Page:
- """Make a fresh copy of a page."""
- old_annots = {} # copy annot references to here
- pno = page.number # save the page number
- for k, v in page._annot_refs.items(): # save the annot dictionary
- old_annots[k] = v
-
- # When we call `self.load_page()` below, it will end up in
- # fz_load_chapter_page(), which will return any matching page in the
- # document's list of non-ref-counted loaded pages, instead of actually
- # reloading the page.
- #
- # We want to assert that we have actually reloaded the fz_page, and not
- # simply returned the same `fz_page*` pointer from the document's list
- # of non-ref-counted loaded pages.
- #
- # So we first remove our reference to the `fz_page*`. This will
- # decrement .refs, and if .refs was 1, this is guaranteed to free the
- # `fz_page*` and remove it from the document's list if it was there. So
- # we are guaranteed that our returned `fz_page*` is from a genuine
- # reload, even if it happens to reuse the original block of memory.
- #
- # However if the original .refs is greater than one, there must be
- # other references to the `fz_page` somewhere, and we require that
- # these other references are not keeping the page in the document's
- # list. We check that we are returning a newly loaded page by
- # asserting that our returned `fz_page*` is different from the original
- # `fz_page*` - the original was not freed, so a new `fz_page` cannot
- # reuse the same block of memory.
- #
-
- refs_old = page.this.m_internal.refs
- m_internal_old = page.this.m_internal_value()
-
- page.this = None
- page._erase() # remove the page
- page = None
- TOOLS.store_shrink(100)
- page = self.load_page(pno) # reload the page
- # copy annot refs over to the new dictionary
- #page_proxy = weakref.proxy(page)
- for k, v in old_annots.items():
- annot = old_annots[k]
- #annot.parent = page_proxy # refresh parent to new page
- page._annot_refs[k] = annot
- if refs_old == 1:
- # We know that `page.this = None` will have decremented the ref
- # count to zero so we are guaranteed that the new `fz_page` is a
- # new page even if it happens to have reused the same block of
- # memory.
- pass
- else:
- # Check that the new `fz_page*` is different from the original.
- m_internal_new = page.this.m_internal_value()
- assert m_internal_new != m_internal_old, \
- f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}'
- return page
- def resolve_link(self, uri=None, chapters=0):
- """Calculate internal link destination.
- Args:
- uri: (str) some Link.uri
- chapters: (bool) whether to use (chapter, page) format
- Returns:
- (page_id, x, y) where x, y are point coordinates on the page.
- page_id is either page number (if chapters=0), or (chapter, pno).
- """
- if not uri:
- if chapters:
- return (-1, -1), 0, 0
- return -1, 0, 0
- try:
- loc, xp, yp = mupdf.fz_resolve_link(self.this, uri)
- except Exception:
- if g_exceptions_verbose: exception_info()
- if chapters:
- return (-1, -1), 0, 0
- return -1, 0, 0
- if chapters:
- return (loc.chapter, loc.page), xp, yp
- pno = mupdf.fz_page_number_from_location(self.this, loc)
- return pno, xp, yp
- def rewrite_images(
- self,
- dpi_threshold=None,
- dpi_target=0,
- quality=0,
- lossy=True,
- lossless=True,
- bitonal=True,
- color=True,
- gray=True,
- set_to_gray=False,
- options=None,
- ):
- """Rewrite images in a PDF document.
- The typical use case is to reduce the size of the PDF by recompressing
- images. Default parameters will convert all images to JPEG where
- possible, using the specified resolutions and quality. Exclude
- undesired images by setting parameters to False.
- Args:
- dpi_threshold: look at images with a larger DPI only.
- dpi_target: change eligible images to this DPI.
- quality: Quality of the recompressed images (0-100).
- lossy: process lossy image types (e.g. JPEG).
- lossless: process lossless image types (e.g. PNG).
- bitonal: process black-and-white images (e.g. FAX)
- color: process colored images.
- gray: process gray images.
- set_to_gray: whether to change the PDF to gray at process start.
- options: (PdfImageRewriterOptions) Custom options for image
- rewriting (optional). Expert use only. If provided, other
- parameters are ignored, except set_to_gray.
- """
- quality_str = str(quality)
- if not dpi_threshold:
- dpi_threshold = dpi_target = 0
- if dpi_target > 0 and dpi_target >= dpi_threshold:
- raise ValueError("{dpi_target=} must be less than {dpi_threshold=}")
- template_opts = mupdf.PdfImageRewriterOptions()
- dir1 = set(dir(template_opts)) # for checking that only existing options are set
- if not options:
- opts = mupdf.PdfImageRewriterOptions()
- if bitonal:
- opts.bitonal_image_recompress_method = mupdf.FZ_RECOMPRESS_FAX
- opts.bitonal_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.bitonal_image_subsample_to = dpi_target
- opts.bitonal_image_recompress_quality = quality_str
- opts.bitonal_image_subsample_threshold = dpi_threshold
- if color:
- if lossless:
- opts.color_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
- opts.color_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.color_lossless_image_subsample_to = dpi_target
- opts.color_lossless_image_subsample_threshold = dpi_threshold
- opts.color_lossless_image_recompress_quality = quality_str
- if lossy:
- opts.color_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
- opts.color_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.color_lossy_image_subsample_threshold = dpi_threshold
- opts.color_lossy_image_subsample_to = dpi_target
- opts.color_lossy_image_recompress_quality = quality_str
- if gray:
- if lossless:
- opts.gray_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
- opts.gray_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.gray_lossless_image_subsample_to = dpi_target
- opts.gray_lossless_image_subsample_threshold = dpi_threshold
- opts.gray_lossless_image_recompress_quality = quality_str
- if lossy:
- opts.gray_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
- opts.gray_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.gray_lossy_image_subsample_threshold = dpi_threshold
- opts.gray_lossy_image_subsample_to = dpi_target
- opts.gray_lossy_image_recompress_quality = quality_str
- else:
- opts = options
- dir2 = set(dir(opts)) # checking that only possible options were used
- invalid_options = dir2 - dir1
- if invalid_options:
- raise ValueError(f"Invalid options: {invalid_options}")
- if set_to_gray:
- self.recolor(1)
- pdf = _as_pdf_document(self)
- mupdf.pdf_rewrite_images(pdf, opts)
- def recolor(self, components=1):
- """Change the color component count on all pages.
- Args:
- components: (int) desired color component count, one of 1, 3, 4.
- Invokes the same-named method for all pages.
- """
- if not self.is_pdf:
- raise ValueError("is no PDF")
- for i in range(self.page_count):
- self.load_page(i).recolor(components)
- def resolve_names(self):
- """Convert the PDF's destination names into a Python dict.
- The only parameter is the pymupdf.Document.
- All names found in the catalog under keys "/Dests" and "/Names/Dests" are
- being included.
- Returns:
- A dcitionary with the following layout:
- - key: (str) the name
- - value: (dict) with the following layout:
- * "page": target page number (0-based). If no page number found -1.
- * "to": (x, y) target point on page - currently in PDF coordinates,
- i.e. point (0,0) is the bottom-left of the page.
- * "zoom": (float) the zoom factor
- * "dest": (str) only occurs if the target location on the page has
- not been provided as "/XYZ" or if no page number was found.
- Examples:
- {'__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0},
- '__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}}
- or
- '21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, ...
- """
- if hasattr(self, "_resolved_names"): # do not execute multiple times!
- return self._resolved_names
- # this is a backward listing of page xref to page number
- page_xrefs = {self.page_xref(i): i for i in range(self.page_count)}
- def obj_string(obj):
- """Return string version of a PDF object definition."""
- buffer = mupdf.fz_new_buffer(512)
- output = mupdf.FzOutput(buffer)
- mupdf.pdf_print_obj(output, obj, 1, 0)
- output.fz_close_output()
- return JM_UnicodeFromBuffer(buffer)
- def get_array(val):
- """Generate value of one item of the names dictionary."""
- templ_dict = {"page": -1, "dest": ""} # value template
- if val.pdf_is_indirect():
- val = mupdf.pdf_resolve_indirect(val)
- if val.pdf_is_array():
- array = obj_string(val)
- elif val.pdf_is_dict():
- array = obj_string(mupdf.pdf_dict_gets(val, "D"))
- else: # if all fails return the empty template
- return templ_dict
- # replace PDF "null" by zero, omit the square brackets
- array = array.replace("null", "0")[1:-1]
- # find stuff before first "/"
- idx = array.find("/")
- if idx < 1: # this has no target page spec
- templ_dict["dest"] = array # return the orig. string
- return templ_dict
- subval = array[:idx].strip() # stuff before "/"
- array = array[idx:] # stuff from "/" onwards
- templ_dict["dest"] = array
- # if we start with /XYZ: extract x, y, zoom
- # 1, 2 or 3 of these values may actually be supplied
- if array.startswith("/XYZ"):
- del templ_dict["dest"] # don't return orig string in this case
- # make a list of the 3 tokens following "/XYZ"
- array_list = array.split()[1:4] # omit "/XYZ"
- # fill up missing tokens with "0" strings
- while len(array_list) < 3: # fill up if too short
- array_list.append("0") # add missing values
- # make list of 3 floats: x, y and zoom
- t = list(map(float, array_list)) # the resulting x, y, z values
- templ_dict["to"] = (t[0], t[1])
- templ_dict["zoom"] = t[2]
- # extract page number
- if subval.endswith("0 R"): # page xref given?
- templ_dict["page"] = page_xrefs.get(int(subval.split()[0]),-1)
- else: # naked page number given
- templ_dict["page"] = int(subval)
- return templ_dict
- def fill_dict(dest_dict, pdf_dict):
- """Generate name resolution items for pdf_dict.
- This may be either "/Names/Dests" or just "/Dests"
- """
- # length of the PDF dictionary
- name_count = mupdf.pdf_dict_len(pdf_dict)
- # extract key-val of each dict item
- for i in range(name_count):
- key = mupdf.pdf_dict_get_key(pdf_dict, i)
- val = mupdf.pdf_dict_get_val(pdf_dict, i)
- if key.pdf_is_name(): # this should always be true!
- dict_key = key.pdf_to_name()
- else:
- message(f"key {i} is no /Name")
- dict_key = None
- if dict_key:
- dest_dict[dict_key] = get_array(val) # store key/value in dict
- # access underlying PDF document of fz Document
- pdf = mupdf.pdf_document_from_fz_document(self)
- # access PDF catalog
- catalog = mupdf.pdf_dict_gets(mupdf.pdf_trailer(pdf), "Root")
- dest_dict = {}
- # make PDF_NAME(Dests)
- dests = mupdf.pdf_new_name("Dests")
- # extract destinations old style (PDF 1.1)
- old_dests = mupdf.pdf_dict_get(catalog, dests)
- if old_dests.pdf_is_dict():
- fill_dict(dest_dict, old_dests)
- # extract destinations new style (PDF 1.2+)
- tree = mupdf.pdf_load_name_tree(pdf, dests)
- if tree.pdf_is_dict():
- fill_dict(dest_dict, tree)
- self._resolved_names = dest_dict # store result or reuse
- return dest_dict
- def save(
- self,
- filename,
- garbage=0,
- clean=0,
- deflate=0,
- deflate_images=0,
- deflate_fonts=0,
- incremental=0,
- ascii=0,
- expand=0,
- linear=0,
- no_new_id=0,
- appearance=0,
- pretty=0,
- encryption=1,
- permissions=4095,
- owner_pw=None,
- user_pw=None,
- preserve_metadata=1,
- use_objstms=0,
- compression_effort=0,
- ):
- # From %pythonprepend save
- #
- """Save PDF to file, pathlib.Path or file pointer."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if type(filename) is str:
- pass
- elif hasattr(filename, "open"): # assume: pathlib.Path
- filename = str(filename)
- elif hasattr(filename, "name"): # assume: file object
- filename = filename.name
- elif not hasattr(filename, "seek"): # assume file object
- raise ValueError("filename must be str, Path or file object")
- if filename == self.name and not incremental:
- raise ValueError("save to original must be incremental")
- if linear and use_objstms:
- raise ValueError("'linear' and 'use_objstms' cannot both be requested")
- if self.page_count < 1:
- raise ValueError("cannot save with zero pages")
- if incremental:
- if self.name != filename or self.stream:
- raise ValueError("incremental needs original file")
- if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40:
- raise ValueError("password length must not exceed 40")
-
- pdf = _as_pdf_document(self)
- opts = mupdf.PdfWriteOptions()
- opts.do_incremental = incremental
- opts.do_ascii = ascii
- opts.do_compress = deflate
- opts.do_compress_images = deflate_images
- opts.do_compress_fonts = deflate_fonts
- opts.do_decompress = expand
- opts.do_garbage = garbage
- opts.do_pretty = pretty
- opts.do_linear = linear
- opts.do_clean = clean
- opts.do_sanitize = clean
- opts.dont_regenerate_id = no_new_id
- opts.do_appearance = appearance
- opts.do_encrypt = encryption
- opts.permissions = permissions
- if owner_pw is not None:
- opts.opwd_utf8_set_value(owner_pw)
- elif user_pw is not None:
- opts.opwd_utf8_set_value(user_pw)
- if user_pw is not None:
- opts.upwd_utf8_set_value(user_pw)
- opts.do_preserve_metadata = preserve_metadata
- opts.do_use_objstms = use_objstms
- opts.compression_effort = compression_effort
- out = None
- pdf.m_internal.resynth_required = 0
- JM_embedded_clean(pdf)
- if no_new_id == 0:
- JM_ensure_identity(pdf)
- if isinstance(filename, str):
- #log( 'calling mupdf.pdf_save_document()')
- mupdf.pdf_save_document(pdf, filename, opts)
- else:
- out = JM_new_output_fileptr(filename)
- #log( f'{type(out)=} {type(out.this)=}')
- mupdf.pdf_write_document(pdf, out, opts)
- out.fz_close_output()
- def save_snapshot(self, filename):
- """Save a file snapshot suitable for journalling."""
- if self.is_closed:
- raise ValueError("doc is closed")
- if type(filename) is str:
- pass
- elif hasattr(filename, "open"): # assume: pathlib.Path
- filename = str(filename)
- elif hasattr(filename, "name"): # assume: file object
- filename = filename.name
- else:
- raise ValueError("filename must be str, Path or file object")
- if filename == self.name:
- raise ValueError("cannot snapshot to original")
- pdf = _as_pdf_document(self)
- mupdf.pdf_save_snapshot(pdf, filename)
- def saveIncr(self):
- """ Save PDF incrementally"""
- return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
- # ------------------------------------------------------------------------------
- # Remove potentially sensitive data from a PDF. Similar to the Adobe
- # Acrobat 'sanitize' function
- # ------------------------------------------------------------------------------
- def scrub(
- doc: 'Document',
- attached_files: bool = True,
- clean_pages: bool = True,
- embedded_files: bool = True,
- hidden_text: bool = True,
- javascript: bool = True,
- metadata: bool = True,
- redactions: bool = True,
- redact_images: int = 0,
- remove_links: bool = True,
- reset_fields: bool = True,
- reset_responses: bool = True,
- thumbnails: bool = True,
- xml_metadata: bool = True,
- ) -> None:
-
- def remove_hidden(cont_lines):
- """Remove hidden text from a PDF page.
- Args:
- cont_lines: list of lines with /Contents content. Should have status
- from after page.cleanContents().
- Returns:
- List of /Contents lines from which hidden text has been removed.
- Notes:
- The input must have been created after the page's /Contents object(s)
- have been cleaned with page.cleanContents(). This ensures a standard
- formatting: one command per line, single spaces between operators.
- This allows for drastic simplification of this code.
- """
- out_lines = [] # will return this
- in_text = False # indicate if within BT/ET object
- suppress = False # indicate text suppression active
- make_return = False
- for line in cont_lines:
- if line == b"BT": # start of text object
- in_text = True # switch on
- out_lines.append(line) # output it
- continue
- if line == b"ET": # end of text object
- in_text = False # switch off
- out_lines.append(line) # output it
- continue
- if line == b"3 Tr": # text suppression operator
- suppress = True # switch on
- make_return = True
- continue
- if line[-2:] == b"Tr" and line[0] != b"3":
- suppress = False # text rendering changed
- out_lines.append(line)
- continue
- if line == b"Q": # unstack command also switches off
- suppress = False
- out_lines.append(line)
- continue
- if suppress and in_text: # suppress hidden lines
- continue
- out_lines.append(line)
- if make_return:
- return out_lines
- else:
- return None
- if not doc.is_pdf: # only works for PDF
- raise ValueError("is no PDF")
- if doc.is_encrypted or doc.is_closed:
- raise ValueError("closed or encrypted doc")
- if not clean_pages:
- hidden_text = False
- redactions = False
- if metadata:
- doc.set_metadata({}) # remove standard metadata
- for page in doc:
- if reset_fields:
- # reset form fields (widgets)
- for widget in page.widgets():
- widget.reset()
- if remove_links:
- links = page.get_links() # list of all links on page
- for link in links: # remove all links
- page.delete_link(link)
- found_redacts = False
- for annot in page.annots():
- if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
- annot.update_file(buffer_=b" ") # set file content to empty
- if reset_responses:
- annot.delete_responses()
- if annot.type[0] == mupdf.PDF_ANNOT_REDACT: # pylint: disable=no-member
- found_redacts = True
- if redactions and found_redacts:
- page.apply_redactions(images=redact_images)
- if not (clean_pages or hidden_text):
- continue # done with the page
- page.clean_contents()
- if not page.get_contents():
- continue
- if hidden_text:
- xrefs = page.get_contents()
- assert len(xrefs) == 1 # only one because of cleaning.
- xref = xrefs[0]
- cont = doc.xref_stream(xref)
- cont_lines = remove_hidden(cont.splitlines()) # remove hidden text
- if cont_lines: # something was actually removed
- cont = b"\n".join(cont_lines)
- doc.update_stream(xref, cont) # rewrite the page /Contents
- if thumbnails: # remove page thumbnails?
- if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
- doc.xref_set_key(page.xref, "Thumb", "null")
- # pages are scrubbed, now perform document-wide scrubbing
- # remove embedded files
- if embedded_files:
- for name in doc.embfile_names():
- doc.embfile_del(name)
- if xml_metadata:
- doc.del_xml_metadata()
- if not (xml_metadata or javascript):
- xref_limit = 0
- else:
- xref_limit = doc.xref_length()
- for xref in range(1, xref_limit):
- if not doc.xref_object(xref):
- msg = "bad xref %i - clean PDF before scrubbing" % xref
- raise ValueError(msg)
- if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
- # a /JavaScript action object
- obj = "<</S/JavaScript/JS()>>" # replace with a null JavaScript
- doc.update_object(xref, obj) # update this object
- continue # no further handling
- if not xml_metadata:
- continue
- if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
- # delete any metadata object directly
- doc.update_object(xref, "<<>>")
- doc.update_stream(xref, b"deleted", new=True)
- continue
- if doc.xref_get_key(xref, "Metadata")[0] != "null":
- doc.xref_set_key(xref, "Metadata", "null")
-
- def search_page_for(
- doc: 'Document',
- pno: int,
- text: str,
- quads: bool = False,
- clip: rect_like = None,
- flags: int = None,
- textpage: 'TextPage' = None,
- ) -> list:
- """Search for a string on a page.
- Args:
- pno: page number
- text: string to be searched for
- clip: restrict search to this rectangle
- quads: (bool) return quads instead of rectangles
- flags: bit switches, default: join hyphened words
- textpage: reuse a prepared textpage
- Returns:
- a list of rectangles or quads, each containing an occurrence.
- """
- if flags is None:
- flags = (0
- | TEXT_DEHYPHENATE
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- )
- return doc[pno].search_for(
- text,
- quads=quads,
- clip=clip,
- flags=flags,
- textpage=textpage,
- )
-
- def select(self, pyliste):
- """Build sub-pdf with page numbers in the list."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if not self.is_pdf:
- raise ValueError("is no PDF")
- if not hasattr(pyliste, "__getitem__"):
- raise ValueError("sequence required")
- valid_range = range(len(self))
- if (len(pyliste) == 0
- or min(pyliste) not in valid_range
- or max(pyliste) not in valid_range
- ):
- raise ValueError("bad page number(s)")
- # get underlying pdf document,
- pdf = _as_pdf_document(self)
- # create page sub-pdf via pdf_rearrange_pages2().
- #
- if mupdf_version_tuple >= (1, 25, 3):
- # We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree
- # which, for example, breaks test_3705.
- mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP)
- else:
- mupdf.pdf_rearrange_pages2(pdf, pyliste)
- # remove any existing pages with their kids
- self._reset_page_refs()
- def set_language(self, language=None):
- pdf = _as_pdf_document(self)
- if not language:
- lang = mupdf.FZ_LANG_UNSET
- else:
- lang = mupdf.fz_text_language_from_string(language)
- mupdf.pdf_set_document_language(pdf, lang)
- return True
- def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, locked=None):
- """Set the PDF keys /ON, /OFF, /RBGroups of an OC layer."""
- if self.is_closed:
- raise ValueError("document closed")
- ocgs = set(self.get_ocgs().keys())
- if ocgs == set():
- raise ValueError("document has no optional content")
- if on:
- if type(on) not in (list, tuple):
- raise ValueError("bad type: 'on'")
- s = set(on).difference(ocgs)
- if s != set():
- raise ValueError("bad OCGs in 'on': %s" % s)
- if off:
- if type(off) not in (list, tuple):
- raise ValueError("bad type: 'off'")
- s = set(off).difference(ocgs)
- if s != set():
- raise ValueError("bad OCGs in 'off': %s" % s)
- if locked:
- if type(locked) not in (list, tuple):
- raise ValueError("bad type: 'locked'")
- s = set(locked).difference(ocgs)
- if s != set():
- raise ValueError("bad OCGs in 'locked': %s" % s)
- if rbgroups:
- if type(rbgroups) not in (list, tuple):
- raise ValueError("bad type: 'rbgroups'")
- for x in rbgroups:
- if not type(x) in (list, tuple):
- raise ValueError("bad RBGroup '%s'" % x)
- s = set(x).difference(ocgs)
- if s != set():
- raise ValueError("bad OCGs in RBGroup: %s" % s)
- if basestate:
- basestate = str(basestate).upper()
- if basestate == "UNCHANGED":
- basestate = "Unchanged"
- if basestate not in ("ON", "OFF", "Unchanged"):
- raise ValueError("bad 'basestate'")
- pdf = _as_pdf_document(self)
- ocp = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('OCProperties'),
- )
- if not ocp.m_internal:
- return
- if config == -1:
- obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
- else:
- obj = mupdf.pdf_array_get(
- mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
- config,
- )
- if not obj.m_internal:
- raise ValueError( MSG_BAD_OC_CONFIG)
- JM_set_ocg_arrays( obj, basestate, on, off, rbgroups, locked)
- mupdf.ll_pdf_read_ocg( pdf.m_internal)
- def set_layer_ui_config(self, number, action=0):
- """Set / unset OC intent configuration."""
- # The user might have given the name instead of sequence number,
- # so select by that name and continue with corresp. number
- if isinstance(number, str):
- select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number]
- if select == []:
- raise ValueError(f"bad OCG '{number}'.")
- number = select[0] # this is the number for the name
- pdf = _as_pdf_document(self)
- if action == 1:
- mupdf.pdf_toggle_layer_config_ui(pdf, number)
- elif action == 2:
- mupdf.pdf_deselect_layer_config_ui(pdf, number)
- else:
- mupdf.pdf_select_layer_config_ui(pdf, number)
- def set_markinfo(self, markinfo: dict) -> bool:
- """Set the PDF MarkInfo values."""
- xref = self.pdf_catalog()
- if xref == 0:
- raise ValueError("not a PDF")
- if not markinfo or not isinstance(markinfo, dict):
- return False
- valid = {"Marked": False, "UserProperties": False, "Suspects": False}
- if not set(valid.keys()).issuperset(markinfo.keys()):
- badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}"
- raise ValueError(badkeys)
- pdfdict = "<<"
- valid.update(markinfo)
- for key, value in valid.items():
- value=str(value).lower()
- if value not in ("true", "false"):
- raise ValueError(f"bad key value '{key}': '{value}'")
- pdfdict += f"/{key} {value}"
- pdfdict += ">>"
- self.xref_set_key(xref, "MarkInfo", pdfdict)
- return True
- def set_metadata(doc: 'Document', m: dict = None) -> None:
- """Update the PDF /Info object.
- Args:
- m: a dictionary like doc.metadata.
- """
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- if doc.is_closed or doc.is_encrypted:
- raise ValueError("document closed or encrypted")
- if m is None:
- m = {}
- elif type(m) is not dict:
- raise ValueError("bad metadata")
- keymap = {
- "author": "Author",
- "producer": "Producer",
- "creator": "Creator",
- "title": "Title",
- "format": None,
- "encryption": None,
- "creationDate": "CreationDate",
- "modDate": "ModDate",
- "subject": "Subject",
- "keywords": "Keywords",
- "trapped": "Trapped",
- }
- valid_keys = set(keymap.keys())
- diff_set = set(m.keys()).difference(valid_keys)
- if diff_set != set():
- msg = "bad dict key(s): %s" % diff_set
- raise ValueError(msg)
- t, temp = doc.xref_get_key(-1, "Info")
- if t != "xref":
- info_xref = 0
- else:
- info_xref = int(temp.replace("0 R", ""))
- if m == {} and info_xref == 0: # nothing to do
- return
- if info_xref == 0: # no prev metadata: get new xref
- info_xref = doc.get_new_xref()
- doc.update_object(info_xref, "<<>>") # fill it with empty object
- doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
- elif m == {}: # remove existing metadata
- doc.xref_set_key(-1, "Info", "null")
- doc.init_doc()
- return
- for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]:
- pdf_key = keymap[key]
- if not bool(val) or val in ("none", "null"):
- val = "null"
- else:
- val = get_pdf_str(val)
- doc.xref_set_key(info_xref, pdf_key, val)
- doc.init_doc()
- return
- def set_oc(doc: 'Document', xref: int, oc: int) -> None:
- """Attach optional content object to image or form xobject.
- Args:
- xref: (int) xref number of an image or form xobject
- oc: (int) xref number of an OCG or OCMD
- """
- if doc.is_closed or doc.is_encrypted:
- raise ValueError("document close or encrypted")
- t, name = doc.xref_get_key(xref, "Subtype")
- if t != "name" or name not in ("/Image", "/Form"):
- raise ValueError("bad object type at xref %i" % xref)
- if oc > 0:
- t, name = doc.xref_get_key(oc, "Type")
- if t != "name" or name not in ("/OCG", "/OCMD"):
- raise ValueError("bad object type at xref %i" % oc)
- if oc == 0 and "OC" in doc.xref_get_keys(xref):
- doc.xref_set_key(xref, "OC", "null")
- return None
- doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
- return None
- def set_ocmd(
- doc: 'Document',
- xref: int = 0,
- ocgs: typing.Union[list, None] = None,
- policy: OptStr = None,
- ve: typing.Union[list, None] = None,
- ) -> int:
- """Create or update an OCMD object in a PDF document.
- Args:
- xref: (int) 0 for creating a new object, otherwise update existing one.
- ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
- policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
- ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.
- Returns:
- Xref of the created or updated OCMD.
- """
- all_ocgs = set(doc.get_ocgs().keys())
- def ve_maker(ve):
- if type(ve) not in (list, tuple) or len(ve) < 2:
- raise ValueError("bad 've' format: %s" % ve)
- if ve[0].lower() not in ("and", "or", "not"):
- raise ValueError("bad operand: %s" % ve[0])
- if ve[0].lower() == "not" and len(ve) != 2:
- raise ValueError("bad 've' format: %s" % ve)
- item = "[/%s" % ve[0].title()
- for x in ve[1:]:
- if type(x) is int:
- if x not in all_ocgs:
- raise ValueError("bad OCG %i" % x)
- item += " %i 0 R" % x
- else:
- item += " %s" % ve_maker(x)
- item += "]"
- return item
- text = "<</Type/OCMD"
- if ocgs and type(ocgs) in (list, tuple): # some OCGs are provided
- s = set(ocgs).difference(all_ocgs) # contains illegal xrefs
- if s != set():
- msg = "bad OCGs: %s" % s
- raise ValueError(msg)
- text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"
- if policy:
- policy = str(policy).lower()
- pols = {
- "anyon": "AnyOn",
- "allon": "AllOn",
- "anyoff": "AnyOff",
- "alloff": "AllOff",
- }
- if policy not in ("anyon", "allon", "anyoff", "alloff"):
- raise ValueError("bad policy: %s" % policy)
- text += "/P/%s" % pols[policy]
- if ve:
- text += "/VE%s" % ve_maker(ve)
- text += ">>"
- # make new object or replace old OCMD (check type first)
- if xref == 0:
- xref = doc.get_new_xref()
- elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
- raise ValueError("bad xref or not an OCMD")
- doc.update_object(xref, text)
- return xref
- def set_pagelayout(self, pagelayout: str):
- """Set the PDF PageLayout value."""
- valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
- xref = self.pdf_catalog()
- if xref == 0:
- raise ValueError("not a PDF")
- if not pagelayout:
- raise ValueError("bad PageLayout value")
- if pagelayout[0] == "/":
- pagelayout = pagelayout[1:]
- for v in valid:
- if pagelayout.lower() == v.lower():
- self.xref_set_key(xref, "PageLayout", f"/{v}")
- return True
- raise ValueError("bad PageLayout value")
- def set_pagemode(self, pagemode: str):
- """Set the PDF PageMode value."""
- valid = ("UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments")
- xref = self.pdf_catalog()
- if xref == 0:
- raise ValueError("not a PDF")
- if not pagemode:
- raise ValueError("bad PageMode value")
- if pagemode[0] == "/":
- pagemode = pagemode[1:]
- for v in valid:
- if pagemode.lower() == v.lower():
- self.xref_set_key(xref, "PageMode", f"/{v}")
- return True
- raise ValueError("bad PageMode value")
- def set_page_labels(doc, labels):
- """Add / replace page label definitions in PDF document.
- Args:
- doc: PDF document (resp. 'self').
- labels: list of label dictionaries like:
- {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
- as returned by get_page_labels().
- """
- # William Chapman, 2021-01-06
- def create_label_str(label):
- """Convert Python label dict to corresponding PDF rule string.
- Args:
- label: (dict) build rule for the label.
- Returns:
- PDF label rule string wrapped in "<<", ">>".
- """
- s = "%i<<" % label["startpage"]
- if label.get("prefix", "") != "":
- s += "/P(%s)" % label["prefix"]
- if label.get("style", "") != "":
- s += "/S/%s" % label["style"]
- if label.get("firstpagenum", 1) > 1:
- s += "/St %i" % label["firstpagenum"]
- s += ">>"
- return s
- def create_nums(labels):
- """Return concatenated string of all labels rules.
- Args:
- labels: (list) dictionaries as created by function 'rule_dict'.
- Returns:
- PDF compatible string for page label definitions, ready to be
- enclosed in PDF array 'Nums[...]'.
- """
- labels.sort(key=lambda x: x["startpage"])
- s = "".join([create_label_str(label) for label in labels])
- return s
- doc._set_page_labels(create_nums(labels))
- def set_toc(
- doc: 'Document',
- toc: list,
- collapse: int = 1,
- ) -> int:
- """Create new outline tree (table of contents, TOC).
- Args:
- toc: (list, tuple) each entry must contain level, title, page and
- optionally top margin on the page. None or '()' remove the TOC.
- collapse: (int) collapses entries beyond this level. Zero or None
- shows all entries unfolded.
- Returns:
- the number of inserted items, or the number of removed items respectively.
- """
- if doc.is_closed or doc.is_encrypted:
- raise ValueError("document closed or encrypted")
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- if not toc: # remove all entries
- return len(doc._delToC())
- # validity checks --------------------------------------------------------
- if type(toc) not in (list, tuple):
- raise ValueError("'toc' must be list or tuple")
- toclen = len(toc)
- page_count = doc.page_count
- t0 = toc[0]
- if type(t0) not in (list, tuple):
- raise ValueError("items must be sequences of 3 or 4 items")
- if t0[0] != 1:
- raise ValueError("hierarchy level of item 0 must be 1")
- for i in list(range(toclen - 1)):
- t1 = toc[i]
- t2 = toc[i + 1]
- if not -1 <= t1[2] <= page_count:
- raise ValueError("row %i: page number out of range" % i)
- if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
- raise ValueError("bad row %i" % (i + 1))
- if (type(t2[0]) is not int) or t2[0] < 1:
- raise ValueError("bad hierarchy level in row %i" % (i + 1))
- if t2[0] > t1[0] + 1:
- raise ValueError("bad hierarchy level in row %i" % (i + 1))
- # no formal errors in toc --------------------------------------------------
- # --------------------------------------------------------------------------
- # make a list of xref numbers, which we can use for our TOC entries
- # --------------------------------------------------------------------------
- old_xrefs = doc._delToC() # del old outlines, get their xref numbers
- # prepare table of xrefs for new bookmarks
- old_xrefs = []
- xref = [0] + old_xrefs
- xref[0] = doc._getOLRootNumber() # entry zero is outline root xref number
- if toclen > len(old_xrefs): # too few old xrefs?
- for i in range((toclen - len(old_xrefs))):
- xref.append(doc.get_new_xref()) # acquire new ones
- lvltab = {0: 0} # to store last entry per hierarchy level
- # ------------------------------------------------------------------------------
- # contains new outline objects as strings - first one is the outline root
- # ------------------------------------------------------------------------------
- olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
- # ------------------------------------------------------------------------------
- # build olitems as a list of PDF-like connected dictionaries
- # ------------------------------------------------------------------------------
- for i in range(toclen):
- o = toc[i]
- lvl = o[0] # level
- title = get_pdf_str(o[1]) # title
- pno = min(doc.page_count - 1, max(0, o[2] - 1)) # page number
- page_xref = doc.page_xref(pno)
- page_height = doc.page_cropbox(pno).height
- top = Point(72, page_height - 36)
- dest_dict = {"to": top, "kind": LINK_GOTO} # fall back target
- if o[2] < 0:
- dest_dict["kind"] = LINK_NONE
- if len(o) > 3: # some target is specified
- if type(o[3]) in (int, float): # convert a number to a point
- dest_dict["to"] = Point(72, page_height - o[3])
- else: # if something else, make sure we have a dict
- # We make a copy of o[3] to avoid modifying our caller's data.
- dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict
- if "to" not in dest_dict: # target point not in dict?
- dest_dict["to"] = top # put default in
- else: # transform target to PDF coordinates
- page = doc[pno]
- point = Point(dest_dict["to"])
- point.y = page.cropbox.height - point.y
- point = point * page.rotation_matrix
- dest_dict["to"] = (point.x, point.y)
- d = {}
- d["first"] = -1
- d["count"] = 0
- d["last"] = -1
- d["prev"] = -1
- d["next"] = -1
- d["dest"] = utils.getDestStr(page_xref, dest_dict)
- d["top"] = dest_dict["to"]
- d["title"] = title
- d["parent"] = lvltab[lvl - 1]
- d["xref"] = xref[i + 1]
- d["color"] = dest_dict.get("color")
- d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
- lvltab[lvl] = i + 1
- parent = olitems[lvltab[lvl - 1]] # the parent entry
- if (
- dest_dict.get("collapse") or collapse and lvl > collapse
- ): # suppress expansion
- parent["count"] -= 1 # make /Count negative
- else:
- parent["count"] += 1 # positive /Count
- if parent["first"] == -1:
- parent["first"] = i + 1
- parent["last"] = i + 1
- else:
- d["prev"] = parent["last"]
- prev = olitems[parent["last"]]
- prev["next"] = i + 1
- parent["last"] = i + 1
- olitems.append(d)
- # ------------------------------------------------------------------------------
- # now create each outline item as a string and insert it in the PDF
- # ------------------------------------------------------------------------------
- for i, ol in enumerate(olitems):
- txt = "<<"
- if ol["count"] != 0:
- txt += "/Count %i" % ol["count"]
- try:
- txt += ol["dest"]
- except Exception:
- # Verbose in PyMuPDF/tests.
- if g_exceptions_verbose >= 2: exception_info()
- pass
- try:
- if ol["first"] > -1:
- txt += "/First %i 0 R" % xref[ol["first"]]
- except Exception:
- if g_exceptions_verbose >= 2: exception_info()
- pass
- try:
- if ol["last"] > -1:
- txt += "/Last %i 0 R" % xref[ol["last"]]
- except Exception:
- if g_exceptions_verbose >= 2: exception_info()
- pass
- try:
- if ol["next"] > -1:
- txt += "/Next %i 0 R" % xref[ol["next"]]
- except Exception:
- # Verbose in PyMuPDF/tests.
- if g_exceptions_verbose >= 2: exception_info()
- pass
- try:
- if ol["parent"] > -1:
- txt += "/Parent %i 0 R" % xref[ol["parent"]]
- except Exception:
- # Verbose in PyMuPDF/tests.
- if g_exceptions_verbose >= 2: exception_info()
- pass
- try:
- if ol["prev"] > -1:
- txt += "/Prev %i 0 R" % xref[ol["prev"]]
- except Exception:
- # Verbose in PyMuPDF/tests.
- if g_exceptions_verbose >= 2: exception_info()
- pass
- try:
- txt += "/Title" + ol["title"]
- except Exception:
- # Verbose in PyMuPDF/tests.
- if g_exceptions_verbose >= 2: exception_info()
- pass
- if ol.get("color") and len(ol["color"]) == 3:
- txt += f"/C[ {_format_g(tuple(ol['color']))}]"
- if ol.get("flags", 0) > 0:
- txt += "/F %i" % ol["flags"]
- if i == 0: # special: this is the outline root
- txt += "/Type/Outlines" # so add the /Type entry
- txt += ">>"
- doc.update_object(xref[i], txt) # insert the PDF object
- doc.init_doc()
- return toclen
- def set_toc_item(
- doc: 'Document',
- idx: int,
- dest_dict: OptDict = None,
- kind: OptInt = None,
- pno: OptInt = None,
- uri: OptStr = None,
- title: OptStr = None,
- to: point_like = None,
- filename: OptStr = None,
- zoom: float = 0,
- ) -> None:
- """Update TOC item by index.
- It allows changing the item's title and link destination.
- Args:
- idx:
- (int) desired index of the TOC list, as created by get_toc.
- dest_dict:
- (dict) destination dictionary as created by get_toc(False).
- Outrules all other parameters. If None, the remaining parameters
- are used to make a dest dictionary.
- kind:
- (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only
- the title will be updated. If pymupdf.LINK_NONE, the TOC item will
- be deleted.
- pno:
- (int) page number (1-based like in get_toc). Required if
- pymupdf.LINK_GOTO.
- uri:
- (str) the URL, required if pymupdf.LINK_URI.
- title:
- (str) the new title. No change if None.
- to:
- (point-like) destination on the target page. If omitted, (72, 36)
- will be used as target coordinates.
- filename:
- (str) destination filename, required for pymupdf.LINK_GOTOR and
- pymupdf.LINK_LAUNCH.
- name:
- (str) a destination name for pymupdf.LINK_NAMED.
- zoom:
- (float) a zoom factor for the target location (pymupdf.LINK_GOTO).
- """
- xref = doc.get_outline_xrefs()[idx]
- page_xref = 0
- if type(dest_dict) is dict:
- if dest_dict["kind"] == LINK_GOTO:
- pno = dest_dict["page"]
- page_xref = doc.page_xref(pno)
- page_height = doc.page_cropbox(pno).height
- to = dest_dict.get('to', Point(72, 36))
- to.y = page_height - to.y
- dest_dict["to"] = to
- action = utils.getDestStr(page_xref, dest_dict)
- if not action.startswith("/A"):
- raise ValueError("bad bookmark dest")
- color = dest_dict.get("color")
- if color:
- color = list(map(float, color))
- if len(color) != 3 or min(color) < 0 or max(color) > 1:
- raise ValueError("bad color value")
- bold = dest_dict.get("bold", False)
- italic = dest_dict.get("italic", False)
- flags = italic + 2 * bold
- collapse = dest_dict.get("collapse")
- return doc._update_toc_item(
- xref,
- action=action[2:],
- title=title,
- color=color,
- flags=flags,
- collapse=collapse,
- )
- if kind == LINK_NONE: # delete bookmark item
- return doc.del_toc_item(idx)
- if kind is None and title is None: # treat as no-op
- return None
- if kind is None: # only update title text
- return doc._update_toc_item(xref, action=None, title=title)
- if kind == LINK_GOTO:
- if pno is None or pno not in range(1, doc.page_count + 1):
- raise ValueError("bad page number")
- page_xref = doc.page_xref(pno - 1)
- page_height = doc.page_cropbox(pno - 1).height
- if to is None:
- to = Point(72, page_height - 36)
- else:
- to = Point(to)
- to.y = page_height - to.y
- ddict = {
- "kind": kind,
- "to": to,
- "uri": uri,
- "page": pno,
- "file": filename,
- "zoom": zoom,
- }
- action = utils.getDestStr(page_xref, ddict)
- if action == "" or not action.startswith("/A"):
- raise ValueError("bad bookmark dest")
- return doc._update_toc_item(xref, action=action[2:], title=title)
- def set_xml_metadata(self, metadata):
- """Store XML document level metadata."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- if not root.m_internal:
- RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
- res = mupdf.fz_new_buffer_from_copied_data( metadata.encode('utf-8'))
- xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
- if xml.m_internal:
- JM_update_stream( pdf, xml, res, 0)
- else:
- xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0)
- mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata'))
- mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
- mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
- def subset_fonts(doc: 'Document', verbose: bool = False, fallback: bool = False) -> OptInt:
- """Build font subsets in a PDF.
- Eligible fonts are potentially replaced by smaller versions. Page text is
- NOT rewritten and thus should retain properties like being hidden or
- controlled by optional content.
- This method by default uses MuPDF's own internal feature to create subset
- fonts. As this is a new function, errors may still occur. In this case,
- please fall back to using the previous version by using "fallback=True".
- Fallback mode requires the external package 'fontTools'.
- Args:
- fallback: use the older deprecated implementation.
- verbose: only used by fallback mode.
- Returns:
- The new MuPDF-based code returns None. The deprecated fallback
- mode returns 0 if there are no fonts to subset. Otherwise, it
- returns the decrease in fontsize (the difference in fontsize),
- measured in bytes.
- """
- # Font binaries: - "buffer" -> (names, xrefs, (unicodes, glyphs))
- # An embedded font is uniquely defined by its fontbuffer only. It may have
- # multiple names and xrefs.
- # Once the sets of used unicodes and glyphs are known, we compute a
- # smaller version of the buffer user package fontTools.
- if not fallback: # by default use MuPDF function
- pdf = mupdf.pdf_document_from_fz_document(doc)
- mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
- return
- font_buffers = {}
- def get_old_widths(xref):
- """Retrieve old font '/W' and '/DW' values."""
- df = doc.xref_get_key(xref, "DescendantFonts")
- if df[0] != "array": # only handle xref specifications
- return None, None
- df_xref = int(df[1][1:-1].replace("0 R", ""))
- widths = doc.xref_get_key(df_xref, "W")
- if widths[0] != "array": # no widths key found
- widths = None
- else:
- widths = widths[1]
- dwidths = doc.xref_get_key(df_xref, "DW")
- if dwidths[0] != "int":
- dwidths = None
- else:
- dwidths = dwidths[1]
- return widths, dwidths
- def set_old_widths(xref, widths, dwidths):
- """Restore the old '/W' and '/DW' in subsetted font.
- If either parameter is None or evaluates to False, the corresponding
- dictionary key will be set to null.
- """
- df = doc.xref_get_key(xref, "DescendantFonts")
- if df[0] != "array": # only handle xref specs
- return None
- df_xref = int(df[1][1:-1].replace("0 R", ""))
- if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
- 0
- ] != "null":
- doc.xref_set_key(df_xref, "W", "null")
- else:
- doc.xref_set_key(df_xref, "W", widths)
- if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
- df_xref, "DW"
- )[0] != "null":
- doc.xref_set_key(df_xref, "DW", "null")
- else:
- doc.xref_set_key(df_xref, "DW", dwidths)
- return None
- def set_subset_fontname(new_xref):
- """Generate a name prefix to tag a font as subset.
- We use a random generator to select 6 upper case ASCII characters.
- The prefixed name must be put in the font xref as the "/BaseFont" value
- and in the FontDescriptor object as the '/FontName' value.
- """
- # The following generates a prefix like 'ABCDEF+'
- import random
- import string
- prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
- font_str = doc.xref_object(new_xref, compressed=True)
- font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
- df = doc.xref_get_key(new_xref, "DescendantFonts")
- if df[0] == "array":
- df_xref = int(df[1][1:-1].replace("0 R", ""))
- fd = doc.xref_get_key(df_xref, "FontDescriptor")
- if fd[0] == "xref":
- fd_xref = int(fd[1].replace("0 R", ""))
- fd_str = doc.xref_object(fd_xref, compressed=True)
- fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
- doc.update_object(fd_xref, fd_str)
- doc.update_object(new_xref, font_str)
- def build_subset(buffer, unc_set, gid_set):
- """Build font subset using fontTools.
- Args:
- buffer: (bytes) the font given as a binary buffer.
- unc_set: (set) required glyph ids.
- Returns:
- Either None if subsetting is unsuccessful or the subset font buffer.
- """
- try:
- import fontTools.subset as fts
- except ImportError:
- if g_exceptions_verbose: exception_info()
- message("This method requires fontTools to be installed.")
- raise
- import tempfile
- with tempfile.TemporaryDirectory() as tmp_dir:
- oldfont_path = f"{tmp_dir}/oldfont.ttf"
- newfont_path = f"{tmp_dir}/newfont.ttf"
- uncfile_path = f"{tmp_dir}/uncfile.txt"
- args = [
- oldfont_path,
- "--retain-gids",
- f"--output-file={newfont_path}",
- "--layout-features=*",
- "--passthrough-tables",
- "--ignore-missing-glyphs",
- "--ignore-missing-unicodes",
- "--symbol-cmap",
- ]
- # store glyph ids or unicodes as file
- with io.open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file:
- if 0xFFFD in unc_set: # error unicode exists -> use glyphs
- args.append(f"--gids-file={uncfile_path}")
- gid_set.add(189)
- unc_list = list(gid_set)
- for unc in unc_list:
- unc_file.write("%i\n" % unc)
- else:
- args.append(f"--unicodes-file={uncfile_path}")
- unc_set.add(255)
- unc_list = list(unc_set)
- for unc in unc_list:
- unc_file.write("%04x\n" % unc)
- # store fontbuffer as a file
- with io.open(oldfont_path, "wb") as fontfile:
- fontfile.write(buffer)
- try:
- os.remove(newfont_path) # remove old file
- except Exception:
- pass
- try: # invoke fontTools subsetter
- fts.main(args)
- font = Font(fontfile=newfont_path)
- new_buffer = font.buffer # subset font binary
- if font.glyph_count == 0: # intercept empty font
- new_buffer = None
- except Exception:
- exception_info()
- new_buffer = None
- return new_buffer
- def repl_fontnames(doc):
- """Populate 'font_buffers'.
- For each font candidate, store its xref and the list of names
- by which PDF text may refer to it (there may be multiple).
- """
- def norm_name(name):
- """Recreate font name that contains PDF hex codes.
- E.g. #20 -> space, chr(32)
- """
- while "#" in name:
- p = name.find("#")
- c = int(name[p + 1 : p + 3], 16)
- name = name.replace(name[p : p + 3], chr(c))
- return name
- def get_fontnames(doc, item):
- """Return a list of fontnames for an item of page.get_fonts().
- There may be multiple names e.g. for Type0 fonts.
- """
- fontname = item[3]
- names = [fontname]
- fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
- fontname = norm_name(fontname)
- if fontname not in names:
- names.append(fontname)
- descendents = doc.xref_get_key(item[0], "DescendantFonts")
- if descendents[0] != "array":
- return names
- descendents = descendents[1][1:-1]
- if descendents.endswith(" 0 R"):
- xref = int(descendents[:-4])
- descendents = doc.xref_object(xref, compressed=True)
- p1 = descendents.find("/BaseFont")
- if p1 >= 0:
- p2 = descendents.find("/", p1 + 1)
- p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
- fontname = descendents[p2 + 1 : p1]
- fontname = norm_name(fontname)
- if fontname not in names:
- names.append(fontname)
- return names
- for i in range(doc.page_count):
- for f in doc.get_page_fonts(i, full=True):
- font_xref = f[0] # font xref
- font_ext = f[1] # font file extension
- basename = f[3] # font basename
- if font_ext not in ( # skip if not supported by fontTools
- "otf",
- "ttf",
- "woff",
- "woff2",
- ):
- continue
- # skip fonts which already are subsets
- if len(basename) > 6 and basename[6] == "+":
- continue
- extr = doc.extract_font(font_xref)
- fontbuffer = extr[-1]
- names = get_fontnames(doc, f)
- name_set, xref_set, subsets = font_buffers.get(
- fontbuffer, (set(), set(), (set(), set()))
- )
- xref_set.add(font_xref)
- for name in names:
- name_set.add(name)
- font = Font(fontbuffer=fontbuffer)
- name_set.add(font.name)
- del font
- font_buffers[fontbuffer] = (name_set, xref_set, subsets)
- def find_buffer_by_name(name):
- for buffer, (name_set, _, _) in font_buffers.items():
- if name in name_set:
- return buffer
- return None
- # -----------------
- # main function
- # -----------------
- repl_fontnames(doc) # populate font information
- if not font_buffers: # nothing found to do
- if verbose:
- message(f'No fonts to subset.')
- return 0
- old_fontsize = 0
- new_fontsize = 0
- for fontbuffer in font_buffers.keys():
- old_fontsize += len(fontbuffer)
- # Scan page text for usage of subsettable fonts
- for page in doc:
- # go through the text and extend set of used glyphs by font
- # we use a modified MuPDF trace device, which delivers us glyph ids.
- for span in page.get_texttrace():
- if type(span) is not dict: # skip useless information
- continue
- fontname = span["font"][:33] # fontname for the span
- buffer = find_buffer_by_name(fontname)
- if buffer is None:
- continue
- name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
- for c in span["chars"]:
- set_ucs.add(c[0]) # unicode
- set_gid.add(c[1]) # glyph id
- font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))
- # build the font subsets
- for old_buffer, (name_set, xref_set, subsets) in font_buffers.items():
- new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
- fontname = list(name_set)[0]
- if new_buffer is None or len(new_buffer) >= len(old_buffer):
- # subset was not created or did not get smaller
- if verbose:
- message(f'Cannot subset {fontname!r}.')
- continue
- if verbose:
- message(f"Built subset of font {fontname!r}.")
- val = doc._insert_font(fontbuffer=new_buffer) # store subset font in PDF
- new_xref = val[0] # get its xref
- set_subset_fontname(new_xref) # tag fontname as subset font
- font_str = doc.xref_object( # get its object definition
- new_xref,
- compressed=True,
- )
- # walk through the original font xrefs and replace each by the subset def
- for font_xref in xref_set:
- # we need the original '/W' and '/DW' width values
- width_table, def_width = get_old_widths(font_xref)
- # ... and replace original font definition at xref with it
- doc.update_object(font_xref, font_str)
- # now copy over old '/W' and '/DW' values
- if width_table or def_width:
- set_old_widths(font_xref, width_table, def_width)
- # 'new_xref' remains unused in the PDF and must be removed
- # by garbage collection.
- new_fontsize += len(new_buffer)
- return old_fontsize - new_fontsize
- def switch_layer(self, config, as_default=0):
- """Activate an OC layer."""
- pdf = _as_pdf_document(self)
- cfgs = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('OCProperties'),
- PDF_NAME('Configs')
- )
- if not mupdf.pdf_is_array( cfgs) or not mupdf.pdf_array_len( cfgs):
- if config < 1:
- return
- raise ValueError( MSG_BAD_OC_LAYER)
- if config < 0:
- return
- mupdf.pdf_select_layer_config( pdf, config)
- if as_default:
- mupdf.pdf_set_layer_config_as_default( pdf)
- mupdf.ll_pdf_read_ocg( pdf.m_internal)
- def update_object(self, xref, text, page=None):
- """Replace object definition source."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- if not _INRANGE(xref, 1, xreflen-1):
- RAISEPY("bad xref", MSG_BAD_XREF)
- ENSURE_OPERATION(pdf)
- # create new object with passed-in string
- new_obj = JM_pdf_obj_from_str(pdf, text)
- mupdf.pdf_update_object(pdf, xref, new_obj)
- if page:
- JM_refresh_links( _as_pdf_page(page))
- def update_stream(self, xref=0, stream=None, new=1, compress=1):
- """Replace xref stream part."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- if xref < 1 or xref > xreflen:
- raise ValueError( MSG_BAD_XREF)
- # get the object
- obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not mupdf.pdf_is_dict(obj):
- raise ValueError( MSG_IS_NO_DICT)
- res = JM_BufferFromBytes(stream)
- if not res.m_internal:
- raise TypeError( MSG_BAD_BUFFER)
- JM_update_stream(pdf, obj, res, compress)
- pdf.dirty = 1
- @property
- def version_count(self):
- '''
- Count versions of PDF document.
- '''
- pdf = _as_pdf_document(self, required=0)
- if pdf.m_internal:
- return mupdf.pdf_count_versions(pdf)
- return 0
- def write(
- self,
- garbage=False,
- clean=False,
- deflate=False,
- deflate_images=False,
- deflate_fonts=False,
- incremental=False,
- ascii=False,
- expand=False,
- linear=False,
- no_new_id=False,
- appearance=False,
- pretty=False,
- encryption=1,
- permissions=4095,
- owner_pw=None,
- user_pw=None,
- preserve_metadata=1,
- use_objstms=0,
- compression_effort=0,
- ):
- from io import BytesIO
- bio = BytesIO()
- self.save(
- bio,
- garbage=garbage,
- clean=clean,
- no_new_id=no_new_id,
- appearance=appearance,
- deflate=deflate,
- deflate_images=deflate_images,
- deflate_fonts=deflate_fonts,
- incremental=incremental,
- ascii=ascii,
- expand=expand,
- linear=linear,
- pretty=pretty,
- encryption=encryption,
- permissions=permissions,
- owner_pw=owner_pw,
- user_pw=user_pw,
- preserve_metadata=preserve_metadata,
- use_objstms=use_objstms,
- compression_effort=compression_effort,
- )
- return bio.getvalue()
-
- def tobytes(self, *args, **kwargs):
- return self.write(*args, **kwargs)
- @property
- def xref(self):
- """PDF xref number of page."""
- CheckParent(self)
- return self.parent.page_xref(self.number)
- def xref_copy(doc: 'Document', source: int, target: int, *, keep: list = None) -> None:
- """Copy a PDF dictionary object to another one given their xref numbers.
- Args:
- doc: PDF document object
- source: source xref number
- target: target xref number, the xref must already exist
- keep: an optional list of 1st level keys in target that should not be
- removed before copying.
- Notes:
- This works similar to the copy() method of dictionaries in Python. The
- source may be a stream object.
- """
- if doc.xref_is_stream(source):
- # read new xref stream, maintaining compression
- stream = doc.xref_stream_raw(source)
- doc.update_stream(
- target,
- stream,
- compress=False, # keeps source compression
- new=True, # in case target is no stream
- )
- # empty the target completely, observe exceptions
- if keep is None:
- keep = []
- for key in doc.xref_get_keys(target):
- if key in keep:
- continue
- doc.xref_set_key(target, key, "null")
- # copy over all source dict items
- for key in doc.xref_get_keys(source):
- item = doc.xref_get_key(source, key)
- doc.xref_set_key(target, key, item[1])
-
- def xref_get_key(self, xref, key):
- """Get PDF dict key value of object at 'xref'."""
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref > 0:
- obj = mupdf.pdf_load_object(pdf, xref)
- else:
- obj = mupdf.pdf_trailer(pdf)
- if not obj.m_internal:
- return ("null", "null")
- subobj = mupdf.pdf_dict_getp(obj, key)
- if not subobj.m_internal:
- return ("null", "null")
- text = None
- if mupdf.pdf_is_indirect(subobj):
- type = "xref"
- text = "%i 0 R" % mupdf.pdf_to_num(subobj)
- elif mupdf.pdf_is_array(subobj):
- type = "array"
- elif mupdf.pdf_is_dict(subobj):
- type = "dict"
- elif mupdf.pdf_is_int(subobj):
- type = "int"
- text = "%i" % mupdf.pdf_to_int(subobj)
- elif mupdf.pdf_is_real(subobj):
- type = "float"
- elif mupdf.pdf_is_null(subobj):
- type = "null"
- text = "null"
- elif mupdf.pdf_is_bool(subobj):
- type = "bool"
- if mupdf.pdf_to_bool(subobj):
- text = "true"
- else:
- text = "false"
- elif mupdf.pdf_is_name(subobj):
- type = "name"
- text = "/%s" % mupdf.pdf_to_name(subobj)
- elif mupdf.pdf_is_string(subobj):
- type = "string"
- text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj))
- else:
- type = "unknown"
- if text is None:
- res = JM_object_to_buffer(subobj, 1, 0)
- text = JM_UnicodeFromBuffer(res)
- return (type, text)
- def xref_get_keys(self, xref):
- """Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer."""
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len( pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref > 0:
- obj = mupdf.pdf_load_object( pdf, xref)
- else:
- obj = mupdf.pdf_trailer( pdf)
- n = mupdf.pdf_dict_len( obj)
- rc = []
- if n == 0:
- return rc
- for i in range(n):
- key = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( obj, i))
- rc.append(key)
- return rc
- def xref_is_font(self, xref):
- """Check if xref is a font object."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if self.xref_get_key(xref, "Type")[1] == "/Font":
- return True
- return False
- def xref_is_image(self, xref):
- """Check if xref is an image object."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if self.xref_get_key(xref, "Subtype")[1] == "/Image":
- return True
- return False
- def xref_is_stream(self, xref=0):
- """Check if xref is a stream object."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False # not a PDF
- return bool(mupdf.pdf_obj_num_is_stream(pdf, xref))
- def xref_is_xobject(self, xref):
- """Check if xref is a form xobject."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if self.xref_get_key(xref, "Subtype")[1] == "/Form":
- return True
- return False
- def xref_length(self):
- """Get length of xref table."""
- xreflen = 0
- pdf = _as_pdf_document(self, required=0)
- if pdf.m_internal:
- xreflen = mupdf.pdf_xref_len(pdf)
- return xreflen
- def xref_object(self, xref, compressed=0, ascii=0):
- """Get xref object source as a string."""
- if self.is_closed:
- raise ValueError("document closed")
- if g_use_extra:
- ret = extra.xref_object( self.this, xref, compressed, ascii)
- return ret
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref > 0:
- obj = mupdf.pdf_load_object(pdf, xref)
- else:
- obj = mupdf.pdf_trailer(pdf)
- res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii)
- text = JM_EscapeStrFromBuffer(res)
- return text
- def xref_set_key(self, xref, key, value):
- """Set the value of a PDF dictionary key."""
- if self.is_closed:
- raise ValueError("document closed")
- if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}):
- raise ValueError("bad 'key'")
- if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set():
- raise ValueError("bad 'value'")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- #if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- # THROWMSG("bad xref")
- #if len(value) == 0:
- # THROWMSG("bad 'value'")
- #if len(key) == 0:
- # THROWMSG("bad 'key'")
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref != -1:
- obj = mupdf.pdf_load_object(pdf, xref)
- else:
- obj = mupdf.pdf_trailer(pdf)
- new_obj = JM_set_object_value(obj, key, value)
- if not new_obj.m_internal:
- return # did not work: skip update
- if xref != -1:
- mupdf.pdf_update_object(pdf, xref, new_obj)
- else:
- n = mupdf.pdf_dict_len(new_obj)
- for i in range(n):
- mupdf.pdf_dict_put(
- obj,
- mupdf.pdf_dict_get_key(new_obj, i),
- mupdf.pdf_dict_get_val(new_obj, i),
- )
- def xref_stream(self, xref):
- """Get decompressed xref stream."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len( pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref >= 0:
- obj = mupdf.pdf_new_indirect( pdf, xref, 0)
- else:
- obj = mupdf.pdf_trailer( pdf)
- r = None
- if mupdf.pdf_is_stream( obj):
- res = mupdf.pdf_load_stream_number( pdf, xref)
- r = JM_BinFromBuffer( res)
- return r
- def xref_stream_raw(self, xref):
- """Get xref stream without decompression."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len( pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref >= 0:
- obj = mupdf.pdf_new_indirect( pdf, xref, 0)
- else:
- obj = mupdf.pdf_trailer( pdf)
- r = None
- if mupdf.pdf_is_stream( obj):
- res = mupdf.pdf_load_raw_stream_number( pdf, xref)
- r = JM_BinFromBuffer( res)
- return r
- def xref_xml_metadata(self):
- """Get xref of document XML metadata."""
- pdf = _as_pdf_document(self)
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- if not root.m_internal:
- RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
- xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
- xref = 0
- if xml.m_internal:
- xref = mupdf.pdf_to_num( xml)
- return xref
-
- __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
-
- outline = property(lambda self: self._outline)
- is_stream = xref_is_stream
- open = Document
- class DocumentWriter:
- def __enter__(self):
- return self
- def __exit__(self, *args):
- self.close()
- def __init__(self, path, options=''):
- if isinstance( path, str):
- pass
- elif hasattr( path, 'absolute'):
- path = str( path)
- elif hasattr( path, 'name'):
- path = path.name
- if isinstance( path, str):
- self.this = mupdf.FzDocumentWriter( path, options, mupdf.FzDocumentWriter.PathType_PDF)
- else:
- # Need to keep the Python JM_new_output_fileptr_Output instance
- # alive for the lifetime of this DocumentWriter, otherwise calls
- # to virtual methods implemented in Python fail. So we make it a
- # member of this DocumentWriter.
- #
- # Unrelated to this, mupdf.FzDocumentWriter will set
- # self._out.m_internal to null because ownership is passed in.
- #
- out = JM_new_output_fileptr( path)
- self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF)
- assert out.m_internal_value() == 0
- assert hasattr( self.this, '_out')
-
- def begin_page( self, mediabox):
- mediabox2 = JM_rect_from_py(mediabox)
- device = mupdf.fz_begin_page( self.this, mediabox2)
- device_wrapper = DeviceWrapper( device)
- return device_wrapper
-
- def close( self):
- mupdf.fz_close_document_writer( self.this)
-
- def end_page( self):
- mupdf.fz_end_page( self.this)
- class Font:
- def __del__(self):
- if type(self) is not Font:
- return None
- def __init__(
- self,
- fontname=None,
- fontfile=None,
- fontbuffer=None,
- script=0,
- language=None,
- ordering=-1,
- is_bold=0,
- is_italic=0,
- is_serif=0,
- embed=1,
- ):
-
- if fontbuffer:
- if hasattr(fontbuffer, "getvalue"):
- fontbuffer = fontbuffer.getvalue()
- elif isinstance(fontbuffer, bytearray):
- fontbuffer = bytes(fontbuffer)
- if not isinstance(fontbuffer, bytes):
- raise ValueError("bad type: 'fontbuffer'")
-
- if isinstance(fontname, str):
- fname_lower = fontname.lower()
- if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower:
- message("Warning: did you mean a fontfile?")
- if fname_lower in ("cjk", "china-t", "china-ts"):
- ordering = 0
- elif fname_lower.startswith("china-s"):
- ordering = 1
- elif fname_lower.startswith("korea"):
- ordering = 3
- elif fname_lower.startswith("japan"):
- ordering = 2
- elif fname_lower in fitz_fontdescriptors.keys():
- import pymupdf_fonts # optional fonts
- fontbuffer = pymupdf_fonts.myfont(fname_lower) # make a copy
- fontname = None # ensure using fontbuffer only
- del pymupdf_fonts # remove package again
- elif ordering < 0:
- fontname = Base14_fontdict.get(fontname, fontname)
- lang = mupdf.fz_text_language_from_string(language)
- font = JM_get_font(fontname, fontfile,
- fontbuffer, script, lang, ordering,
- is_bold, is_italic, is_serif, embed)
- self.this = font
- def __repr__(self):
- return "Font('%s')" % self.name
- @property
- def ascender(self):
- """Return the glyph ascender value."""
- return mupdf.fz_font_ascender(self.this)
- @property
- def bbox(self):
- return self.this.fz_font_bbox()
-
- @property
- def buffer(self):
- buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer))
- return mupdf.fz_buffer_extract_copy( buffer_)
- def char_lengths(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
- """Return tuple of char lengths of unicode 'text' under a fontsize."""
- lang = mupdf.fz_text_language_from_string(language)
- rc = []
- for ch in text:
- c = ord(ch)
- if small_caps:
- gid = mupdf.fz_encode_character_sc(self.this, c)
- if gid >= 0:
- font = self.this
- else:
- gid, font = mupdf.fz_encode_character_with_fallback(self.this, c, script, lang)
- rc.append(fontsize * mupdf.fz_advance_glyph(font, gid, wmode))
- return rc
- @property
- def descender(self):
- """Return the glyph descender value."""
- return mupdf.fz_font_descender(self.this)
- @property
- def flags(self):
- f = mupdf.ll_fz_font_flags(self.this.m_internal)
- if not f:
- return
- assert isinstance( f, mupdf.fz_font_flags_t)
- #log( '{=f}')
- if mupdf_cppyy:
- # cppyy includes remaining higher bits.
- v = [f.is_mono]
- def b(bits):
- ret = v[0] & ((1 << bits)-1)
- v[0] = v[0] >> bits
- return ret
- is_mono = b(1)
- is_serif = b(1)
- is_bold = b(1)
- is_italic = b(1)
- ft_substitute = b(1)
- ft_stretch = b(1)
- fake_bold = b(1)
- fake_italic = b(1)
- has_opentype = b(1)
- invalid_bbox = b(1)
- cjk_lang = b(1)
- embed = b(1)
- never_embed = b(1)
- return {
- "mono": is_mono if mupdf_cppyy else f.is_mono,
- "serif": is_serif if mupdf_cppyy else f.is_serif,
- "bold": is_bold if mupdf_cppyy else f.is_bold,
- "italic": is_italic if mupdf_cppyy else f.is_italic,
- "substitute": ft_substitute if mupdf_cppyy else f.ft_substitute,
- "stretch": ft_stretch if mupdf_cppyy else f.ft_stretch,
- "fake-bold": fake_bold if mupdf_cppyy else f.fake_bold,
- "fake-italic": fake_italic if mupdf_cppyy else f.fake_italic,
- "opentype": has_opentype if mupdf_cppyy else f.has_opentype,
- "invalid-bbox": invalid_bbox if mupdf_cppyy else f.invalid_bbox,
- 'cjk': cjk_lang if mupdf_cppyy else f.cjk,
- 'cjk-lang': cjk_lang if mupdf_cppyy else f.cjk_lang,
- 'embed': embed if mupdf_cppyy else f.embed,
- 'never-embed': never_embed if mupdf_cppyy else f.never_embed,
- }
- def glyph_advance(self, chr_, language=None, script=0, wmode=0, small_caps=0):
- """Return the glyph width of a unicode (font size 1)."""
- lang = mupdf.fz_text_language_from_string(language)
- if small_caps:
- gid = mupdf.fz_encode_character_sc(self.this, chr_)
- if gid >= 0:
- font = self.this
- else:
- gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr_, script, lang)
- return mupdf.fz_advance_glyph(font, gid, wmode)
- def glyph_bbox(self, char, language=None, script=0, small_caps=0):
- """Return the glyph bbox of a unicode (font size 1)."""
- lang = mupdf.fz_text_language_from_string(language)
- if small_caps:
- gid = mupdf.fz_encode_character_sc( self.this, char)
- if gid >= 0:
- font = self.this
- else:
- gid, font = mupdf.fz_encode_character_with_fallback( self.this, char, script, lang)
- return Rect(mupdf.fz_bound_glyph( font, gid, mupdf.FzMatrix()))
- @property
- def glyph_count(self):
- return self.this.m_internal.glyph_count
- def glyph_name_to_unicode(self, name):
- """Return the unicode for a glyph name."""
- return glyph_name_to_unicode(name)
- def has_glyph(self, chr, language=None, script=0, fallback=0, small_caps=0):
- """Check whether font has a glyph for this unicode."""
- if fallback:
- lang = mupdf.fz_text_language_from_string(language)
- gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr, script, lang)
- else:
- if small_caps:
- gid = mupdf.fz_encode_character_sc(self.this, chr)
- else:
- gid = mupdf.fz_encode_character(self.this, chr)
- return gid
- @property
- def is_bold(self):
- return mupdf.fz_font_is_bold( self.this)
- @property
- def is_italic(self):
- return mupdf.fz_font_is_italic( self.this)
- @property
- def is_monospaced(self):
- return mupdf.fz_font_is_monospaced( self.this)
- @property
- def is_serif(self):
- return mupdf.fz_font_is_serif( self.this)
- @property
- def is_writable(self):
- return True # see pymupdf commit ef4056ee4da2
- font = self.this
- flags = mupdf.ll_fz_font_flags(font.m_internal)
- if mupdf_cppyy:
- # cppyy doesn't handle bitfields correctly.
- import cppyy
- ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags)
- else:
- ft_substitute = flags.ft_substitute
-
- if ( mupdf.ll_fz_font_t3_procs(font.m_internal)
- or ft_substitute
- or not mupdf.pdf_font_writing_supported(font)
- ):
- return False
- return True
- @property
- def name(self):
- ret = mupdf.fz_font_name(self.this)
- #log( '{ret=}')
- return ret
- def text_length(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
- """Return length of unicode 'text' under a fontsize."""
- thisfont = self.this
- lang = mupdf.fz_text_language_from_string(language)
- rc = 0
- if not isinstance(text, str):
- raise TypeError( MSG_BAD_TEXT)
- for ch in text:
- c = ord(ch)
- if small_caps:
- gid = mupdf.fz_encode_character_sc(thisfont, c)
- if gid >= 0:
- font = thisfont
- else:
- gid, font = mupdf.fz_encode_character_with_fallback(thisfont, c, script, lang)
- rc += mupdf.fz_advance_glyph(font, gid, wmode)
- rc *= fontsize
- return rc
- def unicode_to_glyph_name(self, ch):
- """Return the glyph name for a unicode."""
- return unicode_to_glyph_name(ch)
- def valid_codepoints(self):
- '''
- Returns sorted list of valid unicodes of a fz_font.
- '''
- ucs_gids = mupdf.fz_enumerate_font_cmap2(self.this)
- ucss = [i.ucs for i in ucs_gids]
- ucss_unique = set(ucss)
- ucss_unique_sorted = sorted(ucss_unique)
- return ucss_unique_sorted
- class Graftmap:
- def __del__(self):
- if not type(self) is Graftmap:
- return
- self.thisown = False
- def __init__(self, doc):
- dst = _as_pdf_document(doc)
- map_ = mupdf.pdf_new_graft_map(dst)
- self.this = map_
- self.thisown = True
- class Link:
- def __del__(self):
- self._erase()
- def __init__( self, this):
- assert isinstance( this, mupdf.FzLink)
- self.this = this
- def __repr__(self):
- CheckParent(self)
- return "link on " + str(self.parent)
- def __str__(self):
- CheckParent(self)
- return "link on " + str(self.parent)
- def _border(self, doc, xref):
- pdf = _as_pdf_document(doc, required=0)
- if not pdf.m_internal:
- return
- link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not link_obj.m_internal:
- return
- b = JM_annot_border(link_obj)
- return b
- def _colors(self, doc, xref):
- pdf = _as_pdf_document(doc, required=0)
- if not pdf.m_internal:
- return
- link_obj = mupdf.pdf_new_indirect( pdf, xref, 0)
- if not link_obj.m_internal:
- raise ValueError( MSG_BAD_XREF)
- b = JM_annot_colors( link_obj)
- return b
- def _erase(self):
- self.parent = None
- self.thisown = False
- def _setBorder(self, border, doc, xref):
- pdf = _as_pdf_document(doc, required=0)
- if not pdf.m_internal:
- return
- link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not link_obj.m_internal:
- return
- b = JM_annot_set_border(border, pdf, link_obj)
- return b
-
- @property
- def border(self):
- return self._border(self.parent.parent.this, self.xref)
- @property
- def colors(self):
- return self._colors(self.parent.parent.this, self.xref)
- @property
- def dest(self):
- """Create link destination details."""
- if hasattr(self, "parent") and self.parent is None:
- raise ValueError("orphaned object: parent is None")
- if self.parent.parent.is_closed or self.parent.parent.is_encrypted:
- raise ValueError("document closed or encrypted")
- doc = self.parent.parent
- if self.is_external or self.uri.startswith("#"):
- uri = None
- else:
- uri = doc.resolve_link(self.uri)
- return linkDest(self, uri, doc)
- @property
- def flags(self)->int:
- CheckParent(self)
- doc = self.parent.parent
- if not doc.is_pdf:
- return 0
- f = doc.xref_get_key(self.xref, "F")
- if f[1] != "null":
- return int(f[1])
- return 0
- @property
- def is_external(self):
- """Flag the link as external."""
- CheckParent(self)
- if g_use_extra:
- return extra.Link_is_external( self.this)
- this_link = self.this
- if not this_link.m_internal or not this_link.m_internal.uri:
- return False
- return bool( mupdf.fz_is_external_link( this_link.m_internal.uri))
- @property
- def next(self):
- """Next link."""
- if not self.this.m_internal:
- return None
- CheckParent(self)
- if 0 and g_use_extra:
- val = extra.Link_next( self.this)
- else:
- val = self.this.next()
- if not val.m_internal:
- return None
- val = Link( val)
- if val:
- val.thisown = True
- val.parent = self.parent # copy owning page from prev link
- val.parent._annot_refs[id(val)] = val
- if self.xref > 0: # prev link has an xref
- link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
- link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
- idx = link_xrefs.index(self.xref)
- val.xref = link_xrefs[idx + 1]
- val.id = link_ids[idx + 1]
- else:
- val.xref = 0
- val.id = ""
- return val
- @property
- def rect(self):
- """Rectangle ('hot area')."""
- CheckParent(self)
- # utils.py:getLinkDict() appears to expect exceptions from us, so we
- # ensure that we raise on error.
- if self.this is None or not self.this.m_internal:
- raise Exception( 'self.this.m_internal not available')
- val = JM_py_from_rect( self.this.rect())
- val = Rect(val)
- return val
- def set_border(self, border=None, width=0, dashes=None, style=None):
- if type(border) is not dict:
- border = {"width": width, "style": style, "dashes": dashes}
- return self._setBorder(border, self.parent.parent.this, self.xref)
- def set_colors(self, colors=None, stroke=None, fill=None):
- """Set border colors."""
- CheckParent(self)
- doc = self.parent.parent
- if type(colors) is not dict:
- colors = {"fill": fill, "stroke": stroke}
- fill = colors.get("fill")
- stroke = colors.get("stroke")
- if fill is not None:
- message("warning: links have no fill color")
- if stroke in ([], ()):
- doc.xref_set_key(self.xref, "C", "[]")
- return
- if hasattr(stroke, "__float__"):
- stroke = [float(stroke)]
- CheckColor(stroke)
- assert len(stroke) in (1, 3, 4)
- s = f"[{_format_g(stroke)}]"
- doc.xref_set_key(self.xref, "C", s)
- def set_flags(self, flags):
- CheckParent(self)
- doc = self.parent.parent
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- if not type(flags) is int:
- raise ValueError("bad 'flags' value")
- doc.xref_set_key(self.xref, "F", str(flags))
- return None
- @property
- def uri(self):
- """Uri string."""
- #CheckParent(self)
- if g_use_extra:
- return extra.link_uri(self.this)
- this_link = self.this
- return this_link.m_internal.uri if this_link.m_internal else ''
- page = -1
- class Matrix:
- def __abs__(self):
- return math.sqrt(sum([c*c for c in self]))
- def __add__(self, m):
- if hasattr(m, "__float__"):
- return Matrix(self.a + m, self.b + m, self.c + m,
- self.d + m, self.e + m, self.f + m)
- if len(m) != 6:
- raise ValueError("Matrix: bad seq len")
- return Matrix(self.a + m[0], self.b + m[1], self.c + m[2],
- self.d + m[3], self.e + m[4], self.f + m[5])
- def __bool__(self):
- return not (max(self) == min(self) == 0)
- def __eq__(self, mat):
- if not hasattr(mat, "__len__"):
- return False
- return len(mat) == 6 and not (self - mat)
- def __getitem__(self, i):
- return (self.a, self.b, self.c, self.d, self.e, self.f)[i]
- def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None):
- """
- Matrix() - all zeros
- Matrix(a, b, c, d, e, f)
- Matrix(zoom-x, zoom-y) - zoom
- Matrix(shear-x, shear-y, 1) - shear
- Matrix(degree) - rotate
- Matrix(Matrix) - new copy
- Matrix(sequence) - from 'sequence'
- Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix.
-
- Explicit keyword args a, b, c, d, e, f override any earlier settings if
- not None.
- """
- if not args:
- self.a = self.b = self.c = self.d = self.e = self.f = 0.0
- elif len(args) > 6:
- raise ValueError("Matrix: bad seq len")
- elif len(args) == 6: # 6 numbers
- self.a, self.b, self.c, self.d, self.e, self.f = map(float, args)
- elif len(args) == 1: # either an angle or a sequ
- if isinstance(args[0], mupdf.FzMatrix):
- self.a = args[0].a
- self.b = args[0].b
- self.c = args[0].c
- self.d = args[0].d
- self.e = args[0].e
- self.f = args[0].f
- elif hasattr(args[0], "__float__"):
- theta = math.radians(args[0])
- c_ = round(math.cos(theta), 8)
- s_ = round(math.sin(theta), 8)
- self.a = self.d = c_
- self.b = s_
- self.c = -s_
- self.e = self.f = 0.0
- else:
- self.a, self.b, self.c, self.d, self.e, self.f = map(float, args[0])
- elif len(args) == 2 or len(args) == 3 and args[2] == 0:
- self.a, self.b, self.c, self.d, self.e, self.f = float(args[0]), \
- 0.0, 0.0, float(args[1]), 0.0, 0.0
- elif len(args) == 3 and args[2] == 1:
- self.a, self.b, self.c, self.d, self.e, self.f = 1.0, \
- float(args[1]), float(args[0]), 1.0, 0.0, 0.0
- else:
- raise ValueError("Matrix: bad args")
-
- # Override with explicit args if specified.
- if a is not None: self.a = a
- if b is not None: self.b = b
- if c is not None: self.c = c
- if d is not None: self.d = d
- if e is not None: self.e = e
- if f is not None: self.f = f
- def __invert__(self):
- """Calculate inverted matrix."""
- m1 = Matrix()
- m1.invert(self)
- return m1
- def __len__(self):
- return 6
- def __mul__(self, m):
- if hasattr(m, "__float__"):
- return Matrix(self.a * m, self.b * m, self.c * m,
- self.d * m, self.e * m, self.f * m)
- m1 = Matrix(1,1)
- return m1.concat(self, m)
- def __neg__(self):
- return Matrix(-self.a, -self.b, -self.c, -self.d, -self.e, -self.f)
- def __nonzero__(self):
- return not (max(self) == min(self) == 0)
- def __pos__(self):
- return Matrix(self)
- def __repr__(self):
- return "Matrix" + str(tuple(self))
- def __setitem__(self, i, v):
- v = float(v)
- if i == 0: self.a = v
- elif i == 1: self.b = v
- elif i == 2: self.c = v
- elif i == 3: self.d = v
- elif i == 4: self.e = v
- elif i == 5: self.f = v
- else:
- raise IndexError("index out of range")
- return
- def __sub__(self, m):
- if hasattr(m, "__float__"):
- return Matrix(self.a - m, self.b - m, self.c - m,
- self.d - m, self.e - m, self.f - m)
- if len(m) != 6:
- raise ValueError("Matrix: bad seq len")
- return Matrix(self.a - m[0], self.b - m[1], self.c - m[2],
- self.d - m[3], self.e - m[4], self.f - m[5])
- def __truediv__(self, m):
- if hasattr(m, "__float__"):
- return Matrix(self.a * 1./m, self.b * 1./m, self.c * 1./m,
- self.d * 1./m, self.e * 1./m, self.f * 1./m)
- m1 = util_invert_matrix(m)[1]
- if not m1:
- raise ZeroDivisionError("matrix not invertible")
- m2 = Matrix(1,1)
- return m2.concat(self, m1)
- def concat(self, one, two):
- """Multiply two matrices and replace current one."""
- if not len(one) == len(two) == 6:
- raise ValueError("Matrix: bad seq len")
- self.a, self.b, self.c, self.d, self.e, self.f = util_concat_matrix(one, two)
- return self
- def invert(self, src=None):
- """Calculate the inverted matrix. Return 0 if successful and replace
- current one. Else return 1 and do nothing.
- """
- if src is None:
- dst = util_invert_matrix(self)
- else:
- dst = util_invert_matrix(src)
- if dst[0] == 1:
- return 1
- self.a, self.b, self.c, self.d, self.e, self.f = dst[1]
- return 0
- @property
- def is_rectilinear(self):
- """True if rectangles are mapped to rectangles."""
- return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \
- (abs(self.a) < EPSILON and abs(self.d) < EPSILON)
- def prerotate(self, theta):
- """Calculate pre rotation and replace current matrix."""
- theta = float(theta)
- while theta < 0: theta += 360
- while theta >= 360: theta -= 360
- if abs(0 - theta) < EPSILON:
- pass
- elif abs(90.0 - theta) < EPSILON:
- a = self.a
- b = self.b
- self.a = self.c
- self.b = self.d
- self.c = -a
- self.d = -b
- elif abs(180.0 - theta) < EPSILON:
- self.a = -self.a
- self.b = -self.b
- self.c = -self.c
- self.d = -self.d
- elif abs(270.0 - theta) < EPSILON:
- a = self.a
- b = self.b
- self.a = -self.c
- self.b = -self.d
- self.c = a
- self.d = b
- else:
- rad = math.radians(theta)
- s = math.sin(rad)
- c = math.cos(rad)
- a = self.a
- b = self.b
- self.a = c * a + s * self.c
- self.b = c * b + s * self.d
- self.c =-s * a + c * self.c
- self.d =-s * b + c * self.d
- return self
- def prescale(self, sx, sy):
- """Calculate pre scaling and replace current matrix."""
- sx = float(sx)
- sy = float(sy)
- self.a *= sx
- self.b *= sx
- self.c *= sy
- self.d *= sy
- return self
- def preshear(self, h, v):
- """Calculate pre shearing and replace current matrix."""
- h = float(h)
- v = float(v)
- a, b = self.a, self.b
- self.a += v * self.c
- self.b += v * self.d
- self.c += h * a
- self.d += h * b
- return self
- def pretranslate(self, tx, ty):
- """Calculate pre translation and replace current matrix."""
- tx = float(tx)
- ty = float(ty)
- self.e += tx * self.a + ty * self.c
- self.f += tx * self.b + ty * self.d
- return self
- __inv__ = __invert__
- __div__ = __truediv__
- norm = __abs__
- class IdentityMatrix(Matrix):
- """Identity matrix [1, 0, 0, 1, 0, 0]"""
- def __hash__(self):
- return hash((1,0,0,1,0,0))
- def __init__(self):
- Matrix.__init__(self, 1.0, 1.0)
- def __repr__(self):
- return "IdentityMatrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)"
- def __setattr__(self, name, value):
- if name in "ad":
- self.__dict__[name] = 1.0
- elif name in "bcef":
- self.__dict__[name] = 0.0
- else:
- self.__dict__[name] = value
- def checkargs(*args):
- raise NotImplementedError("Identity is readonly")
- Identity = IdentityMatrix()
- class linkDest:
- """link or outline destination details"""
- def __init__(self, obj, rlink, document=None):
- isExt = obj.is_external
- isInt = not isExt
- self.dest = ""
- self.file_spec = ""
- self.flags = 0
- self.is_map = False
- self.is_uri = False
- self.kind = LINK_NONE
- self.lt = Point(0, 0)
- self.named = dict()
- self.new_window = ""
- self.page = obj.page
- self.rb = Point(0, 0)
- self.uri = obj.uri
-
- def uri_to_dict(uri):
- items = self.uri[1:].split('&')
- ret = dict()
- for item in items:
- eq = item.find('=')
- if eq >= 0:
- ret[item[:eq]] = item[eq+1:]
- else:
- ret[item] = None
- return ret
- def unescape(name):
- """Unescape '%AB' substrings to chr(0xAB)."""
- split = name.replace("%%", "%25") # take care of escaped '%'
- split = split.split("%")
- newname = split[0]
- for item in split[1:]:
- piece = item[:2]
- newname += chr(int(piece, base=16))
- newname += item[2:]
- return newname
-
- if rlink and not self.uri.startswith("#"):
- self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}"
- if obj.is_external:
- self.page = -1
- self.kind = LINK_URI
- if not self.uri:
- self.page = -1
- self.kind = LINK_NONE
- if isInt and self.uri:
- self.uri = self.uri.replace("&zoom=nan", "&zoom=0")
- if self.uri.startswith("#"):
- self.kind = LINK_GOTO
- m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri)
- if m:
- self.page = int(m.group(1)) - 1
- self.lt = Point(float((m.group(3))), float(m.group(4)))
- self.flags = self.flags | LINK_FLAG_L_VALID | LINK_FLAG_T_VALID
- else:
- m = re.match('^#page=([0-9]+)$', self.uri)
- if m:
- self.page = int(m.group(1)) - 1
- else:
- self.kind = LINK_NAMED
- m = re.match('^#nameddest=(.*)', self.uri)
- assert document
- if document and m:
- named = unescape(m.group(1))
- self.named = document.resolve_names().get(named)
- if self.named is None:
- # document.resolve_names() does not contain an
- # entry for `named` so use an empty dict.
- self.named = dict()
- self.named['nameddest'] = named
- else:
- self.named = uri_to_dict(self.uri[1:])
- else:
- self.kind = LINK_NAMED
- self.named = uri_to_dict(self.uri)
- if obj.is_external:
- if not self.uri:
- pass
- elif self.uri.startswith("file:"):
- self.file_spec = self.uri[5:]
- if self.file_spec.startswith("//"):
- self.file_spec = self.file_spec[2:]
- self.is_uri = False
- self.uri = ""
- self.kind = LINK_LAUNCH
- ftab = self.file_spec.split("#")
- if len(ftab) == 2:
- if ftab[1].startswith("page="):
- self.kind = LINK_GOTOR
- self.file_spec = ftab[0]
- self.page = int(ftab[1].split("&")[0][5:]) - 1
- elif ":" in self.uri:
- self.is_uri = True
- self.kind = LINK_URI
- else:
- self.is_uri = True
- self.kind = LINK_LAUNCH
- assert isinstance(self.named, dict)
- class Widget:
- '''
- Class describing a PDF form field ("widget")
- '''
- def __init__(self):
- self.border_color = None
- self.border_style = "S"
- self.border_width = 0
- self.border_dashes = None
- self.choice_values = None # choice fields only
- self.rb_parent = None # radio buttons only: xref of owning parent
- self.field_name = None # field name
- self.field_label = None # field label
- self.field_value = None
- self.field_flags = 0
- self.field_display = 0
- self.field_type = 0 # valid range 1 through 7
- self.field_type_string = None # field type as string
- self.fill_color = None
- self.button_caption = None # button caption
- self.is_signed = None # True / False if signature
- self.text_color = (0, 0, 0)
- self.text_font = "Helv"
- self.text_fontsize = 0
- self.text_maxlen = 0 # text fields only
- self.text_format = 0 # text fields only
- self._text_da = "" # /DA = default appearance
- self.script = None # JavaScript (/A)
- self.script_stroke = None # JavaScript (/AA/K)
- self.script_format = None # JavaScript (/AA/F)
- self.script_change = None # JavaScript (/AA/V)
- self.script_calc = None # JavaScript (/AA/C)
- self.script_blur = None # JavaScript (/AA/Bl)
- self.script_focus = None # JavaScript (/AA/Fo) codespell:ignore
- self.rect = None # annot value
- self.xref = 0 # annot value
- def __repr__(self):
- #return "'%s' widget on %s" % (self.field_type_string, str(self.parent))
- # No self.parent.
- return f'Widget:(field_type={self.field_type_string} script={self.script})'
- return "'%s' widget" % (self.field_type_string)
- def _adjust_font(self):
- """Ensure text_font is from our list and correctly spelled.
- """
- if not self.text_font:
- self.text_font = "Helv"
- return
- valid_fonts = ("Cour", "TiRo", "Helv", "ZaDb")
- for f in valid_fonts:
- if self.text_font.lower() == f.lower():
- self.text_font = f
- return
- self.text_font = "Helv"
- return
- def _checker(self):
- """Any widget type checks.
- """
- if self.field_type not in range(1, 8):
- raise ValueError("bad field type")
- # if setting a radio button to ON, first set Off all buttons
- # in the group - this is not done by MuPDF:
- if self.field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON and self.field_value not in (False, "Off") and hasattr(self, "parent"):
- # so we are about setting this button to ON/True
- # check other buttons in same group and set them to 'Off'
- doc = self.parent.parent
- kids_type, kids_value = doc.xref_get_key(self.xref, "Parent/Kids")
- if kids_type == "array":
- xrefs = tuple(map(int, kids_value[1:-1].replace("0 R","").split()))
- for xref in xrefs:
- if xref != self.xref:
- doc.xref_set_key(xref, "AS", "/Off")
- # the calling method will now set the intended button to on and
- # will find everything prepared for correct functioning.
- def _parse_da(self):
- """Extract font name, size and color from default appearance string (/DA object).
- Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'.
- """
- if not self._text_da:
- return
- font = "Helv"
- fsize = 0
- col = (0, 0, 0)
- dat = self._text_da.split() # split on any whitespace
- for i, item in enumerate(dat):
- if item == "Tf":
- font = dat[i - 2][1:]
- fsize = float(dat[i - 1])
- dat[i] = dat[i-1] = dat[i-2] = ""
- continue
- if item == "g": # unicolor text
- col = [(float(dat[i - 1]))]
- dat[i] = dat[i-1] = ""
- continue
- if item == "rg": # RGB colored text
- col = [float(f) for f in dat[i - 3:i]]
- dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
- continue
- self.text_font = font
- self.text_fontsize = fsize
- self.text_color = col
- self._text_da = ""
- return
- def _validate(self):
- """Validate the class entries.
- """
- if (self.rect.is_infinite
- or self.rect.is_empty
- ):
- raise ValueError("bad rect")
- if not self.field_name:
- raise ValueError("field name missing")
- if self.field_label == "Unnamed":
- self.field_label = None
- CheckColor(self.border_color)
- CheckColor(self.fill_color)
- if not self.text_color:
- self.text_color = (0, 0, 0)
- CheckColor(self.text_color)
- if not self.border_width:
- self.border_width = 0
- if not self.text_fontsize:
- self.text_fontsize = 0
- self.border_style = self.border_style.upper()[0:1]
- # standardize content of JavaScript entries
- btn_type = self.field_type in (
- mupdf.PDF_WIDGET_TYPE_BUTTON,
- mupdf.PDF_WIDGET_TYPE_CHECKBOX,
- mupdf.PDF_WIDGET_TYPE_RADIOBUTTON,
- )
- if not self.script:
- self.script = None
- elif type(self.script) is not str:
- raise ValueError("script content must be a string")
- # buttons cannot have the following script actions
- if btn_type or not self.script_calc:
- self.script_calc = None
- elif type(self.script_calc) is not str:
- raise ValueError("script_calc content must be a string")
- if btn_type or not self.script_change:
- self.script_change = None
- elif type(self.script_change) is not str:
- raise ValueError("script_change content must be a string")
- if btn_type or not self.script_format:
- self.script_format = None
- elif type(self.script_format) is not str:
- raise ValueError("script_format content must be a string")
- if btn_type or not self.script_stroke:
- self.script_stroke = None
- elif type(self.script_stroke) is not str:
- raise ValueError("script_stroke content must be a string")
- if btn_type or not self.script_blur:
- self.script_blur = None
- elif type(self.script_blur) is not str:
- raise ValueError("script_blur content must be a string")
- if btn_type or not self.script_focus:
- self.script_focus = None
- elif type(self.script_focus) is not str:
- raise ValueError("script_focus content must be a string")
- self._checker() # any field_type specific checks
- def _sync_flags(self):
- """Propagate the field flags.
- If this widget has a "/Parent", set its field flags and that of all
- its /Kids widgets to the value of the current widget.
- Only possible for widgets existing in the PDF.
- Returns True or False.
- """
- if not self.xref:
- return False # no xref: widget not in the PDF
- doc = self.parent.parent # the owning document
- assert doc
- pdf = _as_pdf_document(doc)
- # load underlying PDF object
- pdf_widget = mupdf.pdf_load_object(pdf, self.xref)
- Parent = mupdf.pdf_dict_get(pdf_widget, PDF_NAME("Parent"))
- if not Parent.pdf_is_dict():
- return False # no /Parent: nothing to do
- # put the field flags value into the parent field flags:
- Parent.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
- # also put that value into all kids of the Parent
- kids = Parent.pdf_dict_get(PDF_NAME("Kids"))
- if not kids.pdf_is_array():
- message("warning: malformed PDF, Parent has no Kids array")
- return False # no /Kids: should never happen!
- for i in range(kids.pdf_array_len()): # walk through all kids
- # access kid widget, and do some precautionary checks
- kid = kids.pdf_array_get(i)
- if not kid.pdf_is_dict():
- continue
- xref = kid.pdf_to_num() # get xref of the kid
- if xref == self.xref: # skip self widget
- continue
- subtype = kid.pdf_dict_get(PDF_NAME("Subtype"))
- if not subtype.pdf_to_name() == "Widget":
- continue
- # put the field flags value into the kid field flags:
- kid.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
- return True # all done
- def button_states(self):
- """Return the on/off state names for button widgets.
- A button may have 'normal' or 'pressed down' appearances. While the 'Off'
- state is usually called like this, the 'On' state is often given a name
- relating to the functional context.
- """
- if self.field_type not in (2, 5):
- return None # no button type
- if hasattr(self, "parent"): # field already exists on page
- doc = self.parent.parent
- else:
- return
- xref = self.xref
- states = {"normal": None, "down": None}
- APN = doc.xref_get_key(xref, "AP/N")
- if APN[0] == "dict":
- nstates = []
- APN = APN[1][2:-2]
- apnt = APN.split("/")[1:]
- for x in apnt:
- nstates.append(x.split()[0])
- states["normal"] = nstates
- if APN[0] == "xref":
- nstates = []
- nxref = int(APN[1].split(" ")[0])
- APN = doc.xref_object(nxref)
- apnt = APN.split("/")[1:]
- for x in apnt:
- nstates.append(x.split()[0])
- states["normal"] = nstates
- APD = doc.xref_get_key(xref, "AP/D")
- if APD[0] == "dict":
- dstates = []
- APD = APD[1][2:-2]
- apdt = APD.split("/")[1:]
- for x in apdt:
- dstates.append(x.split()[0])
- states["down"] = dstates
- if APD[0] == "xref":
- dstates = []
- dxref = int(APD[1].split(" ")[0])
- APD = doc.xref_object(dxref)
- apdt = APD.split("/")[1:]
- for x in apdt:
- dstates.append(x.split()[0])
- states["down"] = dstates
- return states
- @property
- def next(self):
- return self._annot.next
- def on_state(self):
- """Return the "On" value for button widgets.
-
- This is useful for radio buttons mainly. Checkboxes will always return
- "Yes". Radio buttons will return the string that is unequal to "Off"
- as returned by method button_states().
- If the radio button is new / being created, it does not yet have an
- "On" value. In this case, a warning is shown and True is returned.
- """
- if self.field_type not in (2, 5):
- return None # no checkbox or radio button
- bstate = self.button_states()
- if bstate is None:
- bstate = dict()
- for k in bstate.keys():
- for v in bstate[k]:
- if v != "Off":
- return v
- message("warning: radio button has no 'On' value.")
- return True
- def reset(self):
- """Reset the field value to its default.
- """
- TOOLS._reset_widget(self._annot)
- def update(self, sync_flags=False):
- """Reflect Python object in the PDF."""
- self._validate()
- self._adjust_font() # ensure valid text_font name
- # now create the /DA string
- self._text_da = ""
- if len(self.text_color) == 3:
- fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + self._text_da
- elif len(self.text_color) == 1:
- fmt = "{:g} g /{f:s} {s:g} Tf" + self._text_da
- elif len(self.text_color) == 4:
- fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" + self._text_da
- self._text_da = fmt.format(*self.text_color, f=self.text_font,
- s=self.text_fontsize)
- # finally update the widget
- # if widget has a '/AA/C' script, make sure it is in the '/CO'
- # array of the '/AcroForm' dictionary.
- if self.script_calc: # there is a "calculation" script:
- # make sure we are in the /CO array
- util_ensure_widget_calc(self._annot)
- # finally update the widget
- TOOLS._save_widget(self._annot, self)
- self._text_da = ""
- if sync_flags:
- self._sync_flags() # propagate field flags to parent and kids
- from . import _extra
- class Outline:
- def __init__(self, ol):
- self.this = ol
- @property
- def dest(self):
- '''outline destination details'''
- return linkDest(self, None, None)
- def destination(self, document):
- '''
- Like `dest` property but uses `document` to resolve destinations for
- kind=LINK_NAMED.
- '''
- return linkDest(self, None, document)
-
- @property
- def down(self):
- ol = self.this
- down_ol = ol.down()
- if not down_ol.m_internal:
- return
- return Outline(down_ol)
- @property
- def is_external(self):
- if g_use_extra:
- # calling _extra.* here appears to save significant time in
- # test_toc.py:test_full_toc, 1.2s=>0.94s.
- #
- return _extra.Outline_is_external( self.this)
- ol = self.this
- if not ol.m_internal:
- return False
- uri = ol.m_internal.uri if 1 else ol.uri()
- if uri is None:
- return False
- return mupdf.fz_is_external_link(uri)
- @property
- def is_open(self):
- if 1:
- return self.this.m_internal.is_open
- return self.this.is_open()
- @property
- def next(self):
- ol = self.this
- next_ol = ol.next()
- if not next_ol.m_internal:
- return
- return Outline(next_ol)
- @property
- def page(self):
- if 1:
- return self.this.m_internal.page.page
- return self.this.page().page
- @property
- def title(self):
- return self.this.m_internal.title
- @property
- def uri(self):
- ol = self.this
- if not ol.m_internal:
- return None
- return ol.m_internal.uri
- @property
- def x(self):
- return self.this.m_internal.x
- @property
- def y(self):
- return self.this.m_internal.y
- __slots__ = [ 'this']
- def _make_PdfFilterOptions(
- recurse=0,
- instance_forms=0,
- ascii=0,
- no_update=0,
- sanitize=0,
- sopts=None,
- ):
- '''
- Returns a mupdf.PdfFilterOptions instance.
- '''
- filter_ = mupdf.PdfFilterOptions()
- filter_.recurse = recurse
- filter_.instance_forms = instance_forms
- filter_.ascii = ascii
-
- filter_.no_update = no_update
- if sanitize:
- # We want to use a PdfFilterFactory whose `.filter` fn pointer is
- # set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to
- # get access to this raw fn in Python; and on Windows raw MuPDF
- # functions are not even available to C++.
- #
- # So we use SWIG Director to implement our own
- # PdfFilterFactory whose `filter()` method calls
- # `mupdf.ll_pdf_new_sanitize_filter()`.
- if sopts:
- assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions)
- else:
- sopts = mupdf.PdfSanitizeFilterOptions()
- class Factory(mupdf.PdfFilterFactory2):
- def __init__(self):
- super().__init__()
- self.use_virtual_filter()
- self.sopts = sopts
- def filter(self, ctx, doc, chain, struct_parents, transform, options):
- if 0:
- log(f'sanitize filter.filter():')
- log(f' {self=}')
- log(f' {ctx=}')
- log(f' {doc=}')
- log(f' {chain=}')
- log(f' {struct_parents=}')
- log(f' {transform=}')
- log(f' {options=}')
- log(f' {self.sopts.internal()=}')
- return mupdf.ll_pdf_new_sanitize_filter(
- doc,
- chain,
- struct_parents,
- transform,
- options,
- self.sopts.internal(),
- )
- factory = Factory()
- filter_.add_factory(factory.internal())
- filter_._factory = factory
- return filter_
- class Page:
- def __init__(self, page, document):
- assert isinstance(page, (mupdf.FzPage, mupdf.PdfPage)), f'page is: {page}'
- self.this = page
- self.thisown = True
- self.last_point = None
- self.draw_cont = ''
- self._annot_refs = dict()
- self.parent = document
- if page.m_internal:
- if isinstance( page, mupdf.PdfPage):
- self.number = page.m_internal.super.number
- else:
- self.number = page.m_internal.number
- else:
- self.number = None
- def __repr__(self):
- return self.__str__()
- CheckParent(self)
- x = self.parent.name
- if self.parent.stream is not None:
- x = "<memory, doc# %i>" % (self.parent._graft_id,)
- if x == "":
- x = "<new PDF, doc# %i>" % self.parent._graft_id
- return "page %s of %s" % (self.number, x)
- def __str__(self):
- #CheckParent(self)
- parent = getattr(self, 'parent', None)
- if isinstance(self.this.m_internal, mupdf.pdf_page):
- number = self.this.m_internal.super.number
- else:
- number = self.this.m_internal.number
- ret = f'page {number}'
- if parent:
- x = self.parent.name
- if self.parent.stream is not None:
- x = "<memory, doc# %i>" % (self.parent._graft_id,)
- if x == "":
- x = "<new PDF, doc# %i>" % self.parent._graft_id
- ret += f' of {x}'
- return ret
- def _add_caret_annot(self, point):
- if g_use_extra:
- annot = extra._add_caret_annot( self.this, JM_point_from_py(point))
- else:
- page = self._pdf_page()
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_CARET)
- if point:
- p = JM_point_from_py(point)
- r = mupdf.pdf_annot_rect(annot)
- r = mupdf.FzRect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return annot
- def _add_file_annot(self, point, buffer_, filename, ufilename=None, desc=None, icon=None):
- page = self._pdf_page()
- uf = ufilename if ufilename else filename
- d = desc if desc else filename
- p = JM_point_from_py(point)
- filebuf = JM_BufferFromBytes(buffer_)
- if not filebuf.m_internal:
- raise TypeError( MSG_BAD_BUFFER)
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FILE_ATTACHMENT)
- r = mupdf.pdf_annot_rect(annot)
- r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
- mupdf.pdf_set_annot_rect(annot, r)
- flags = mupdf.PDF_ANNOT_IS_PRINT
- mupdf.pdf_set_annot_flags(annot, flags)
- if icon:
- mupdf.pdf_set_annot_icon_name(annot, icon)
- val = JM_embed_file(page.doc(), filebuf, filename, uf, d, 1)
- mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('FS'), val)
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('Contents'), filename)
- mupdf.pdf_update_annot(annot)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_set_annot_flags(annot, flags)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_freetext_annot(
- self, rect,
- text,
- fontsize=11,
- fontname=None,
- text_color=None,
- fill_color=None,
- border_color=None,
- border_width=0,
- dashes=None,
- callout=None,
- line_end=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
- opacity=1,
- align=0,
- rotate=0,
- richtext=False,
- style=None,
- ):
- rc = f"""<?xml version="1.0"?>
- <body xmlns="http://www.w3.org/1999/xtml"
- xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"
- xfa:contentType="text/html" xfa:APIVersion="Acrobat:8.0.0" xfa:spec="2.4">
- {text}"""
- page = self._pdf_page()
- if border_color and not richtext:
- raise ValueError("cannot set border_color if rich_text is False")
- if border_color and not text_color:
- text_color = border_color
- nfcol, fcol = JM_color_FromSequence(fill_color)
- ntcol, tcol = JM_color_FromSequence(text_color)
- r = JM_rect_from_py(rect)
- if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
- raise ValueError( MSG_BAD_RECT)
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FREE_TEXT)
- annot_obj = mupdf.pdf_annot_obj(annot)
- #insert text as 'contents' or 'RC' depending on 'richtext'
- if not richtext:
- mupdf.pdf_set_annot_contents(annot, text)
- else:
- mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("RC"), rc)
- if style:
- mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("DS"), style)
- mupdf.pdf_set_annot_rect(annot, r)
- while rotate < 0:
- rotate += 360
- while rotate >= 360:
- rotate -= 360
- if rotate != 0:
- mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
- mupdf.pdf_set_annot_quadding(annot, align)
- if nfcol > 0:
- mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
- mupdf.pdf_set_annot_border_width(annot, border_width)
- mupdf.pdf_set_annot_opacity(annot, opacity)
- if dashes:
- for d in dashes:
- mupdf.pdf_add_annot_border_dash_item(annot, float(d))
- # Insert callout information
- if callout:
- mupdf.pdf_dict_put(annot_obj, PDF_NAME("IT"), PDF_NAME("FreeTextCallout"))
- mupdf.pdf_set_annot_callout_style(annot, line_end)
- point_count = len(callout)
- extra.JM_set_annot_callout_line(annot, tuple(callout), point_count)
- # insert the default appearance string
- if not richtext:
- JM_make_annot_DA(annot, ntcol, tcol, fontname, fontsize)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- val = Annot(annot)
- return val
- def _add_ink_annot(self, list):
- page = _as_pdf_page(self.this)
- if not PySequence_Check(list):
- raise ValueError( MSG_BAD_ARG_INK_ANNOT)
- ctm = mupdf.FzMatrix()
- mupdf.pdf_page_transform(page, mupdf.FzRect(0), ctm)
- inv_ctm = mupdf.fz_invert_matrix(ctm)
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_INK)
- annot_obj = mupdf.pdf_annot_obj(annot)
- n0 = len(list)
- inklist = mupdf.pdf_new_array(page.doc(), n0)
- for j in range(n0):
- sublist = list[j]
- n1 = len(sublist)
- stroke = mupdf.pdf_new_array(page.doc(), 2 * n1)
- for i in range(n1):
- p = sublist[i]
- if not PySequence_Check(p) or PySequence_Size(p) != 2:
- raise ValueError( MSG_BAD_ARG_INK_ANNOT)
- point = mupdf.fz_transform_point(JM_point_from_py(p), inv_ctm)
- mupdf.pdf_array_push_real(stroke, point.x)
- mupdf.pdf_array_push_real(stroke, point.y)
- mupdf.pdf_array_push(inklist, stroke)
- mupdf.pdf_dict_put(annot_obj, PDF_NAME('InkList'), inklist)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_line_annot(self, p1, p2):
- page = self._pdf_page()
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE)
- a = JM_point_from_py(p1)
- b = JM_point_from_py(p2)
- mupdf.pdf_set_annot_line(annot, a, b)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- assert annot.m_internal
- return Annot(annot)
- def _add_multiline(self, points, annot_type):
- page = self._pdf_page()
- if len(points) < 2:
- raise ValueError( MSG_BAD_ARG_POINTS)
- annot = mupdf.pdf_create_annot(page, annot_type)
- for p in points:
- if (PySequence_Size(p) != 2):
- raise ValueError( MSG_BAD_ARG_POINTS)
- point = JM_point_from_py(p)
- mupdf.pdf_add_annot_vertex(annot, point)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_redact_annot(self, quad, text=None, da_str=None, align=0, fill=None, text_color=None):
- page = self._pdf_page()
- fcol = [ 1, 1, 1, 0]
- nfcol = 0
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_REDACT)
- q = JM_quad_from_py(quad)
- r = mupdf.fz_rect_from_quad(q)
- # TODO calculate de-rotated rect
- mupdf.pdf_set_annot_rect(annot, r)
- if fill:
- nfcol, fcol = JM_color_FromSequence(fill)
- arr = mupdf.pdf_new_array(page.doc(), nfcol)
- for i in range(nfcol):
- mupdf.pdf_array_push_real(arr, fcol[i])
- mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('IC'), arr)
- if text:
- assert da_str
- mupdf.pdf_dict_puts(
- mupdf.pdf_annot_obj(annot),
- "OverlayText",
- mupdf.pdf_new_text_string(text),
- )
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('DA'), da_str)
- mupdf.pdf_dict_put_int(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'), align)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- annot = mupdf.ll_pdf_keep_annot(annot.m_internal)
- annot = mupdf.PdfAnnot( annot)
- return Annot(annot)
- def _add_square_or_circle(self, rect, annot_type):
- page = self._pdf_page()
- r = JM_rect_from_py(rect)
- if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
- raise ValueError( MSG_BAD_RECT)
- annot = mupdf.pdf_create_annot(page, annot_type)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- assert annot.m_internal
- return Annot(annot)
- def _add_stamp_annot(self, rect, stamp=0):
- rect = Rect(rect)
- r = JM_rect_from_py(rect)
- if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
- raise ValueError(MSG_BAD_RECT)
- page = self._pdf_page()
- stamp_id = [
- "Approved",
- "AsIs",
- "Confidential",
- "Departmental",
- "Experimental",
- "Expired",
- "Final",
- "ForComment",
- "ForPublicRelease",
- "NotApproved",
- "NotForPublicRelease",
- "Sold",
- "TopSecret",
- "Draft",
- ]
- n = len(stamp_id)
- buf = None
- name = None
- if stamp in range(n):
- name = stamp_id[stamp]
- elif isinstance(stamp, Pixmap):
- buf = stamp.tobytes()
- elif isinstance(stamp, str):
- buf = pathlib.Path(stamp).read_bytes()
- elif isinstance(stamp, (bytes, bytearray)):
- buf = stamp
- elif isinstance(stamp, io.BytesIO):
- buf = stamp.getvalue()
- else:
- name = stamp_id[0]
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_STAMP)
- if buf: # image stamp
- fzbuff = mupdf.fz_new_buffer_from_copied_data(buf)
- img = mupdf.fz_new_image_from_buffer(fzbuff)
- # compute image boundary box on page
- w, h = img.w(), img.h()
- scale = min(rect.width / w, rect.height / h)
- width = w * scale # bbox width
- height = h * scale # bbox height
- # center of "rect"
- center = (rect.tl + rect.br) / 2
- x0 = center.x - width / 2
- y0 = center.y - height / 2
- x1 = x0 + width
- y1 = y0 + height
- r = mupdf.fz_make_rect(x0, y0, x1, y1)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_set_annot_stamp_image(annot, img)
- mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), mupdf.pdf_new_name("ImageStamp"))
- mupdf.pdf_set_annot_contents(annot, "Image Stamp")
- else: # text stamp
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), PDF_NAME(name))
- mupdf.pdf_set_annot_contents(annot, name)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_text_annot(self, point, text, icon=None):
- page = self._pdf_page()
- p = JM_point_from_py( point)
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT)
- r = mupdf.pdf_annot_rect(annot)
- r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_set_annot_contents(annot, text)
- if icon:
- mupdf.pdf_set_annot_icon_name(annot, icon)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_text_marker(self, quads, annot_type):
- CheckParent(self)
- if not self.parent.is_pdf:
- raise ValueError("is no PDF")
- val = Page__add_text_marker(self, quads, annot_type)
- if not val:
- return None
- val.parent = weakref.proxy(self)
- self._annot_refs[id(val)] = val
- return val
- def _addAnnot_FromString(self, linklist):
- """Add links from list of object sources."""
- CheckParent(self)
- if g_use_extra:
- self.__class__._addAnnot_FromString = extra.Page_addAnnot_FromString
- #log('Page._addAnnot_FromString() deferring to extra.Page_addAnnot_FromString().')
- return extra.Page_addAnnot_FromString( self.this, linklist)
- page = _as_pdf_page(self.this)
- lcount = len(linklist) # link count
- if lcount < 1:
- return
- i = -1
- # insert links from the provided sources
- if not isinstance(linklist, tuple):
- raise ValueError( "bad 'linklist' argument")
- if not mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')).m_internal:
- mupdf.pdf_dict_put_array( page.obj(), PDF_NAME('Annots'), lcount)
- annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
- assert annots.m_internal, f'{lcount=} {annots.m_internal=}'
- for i in range(lcount):
- txtpy = linklist[i]
- text = JM_StrAsChar(txtpy)
- if not text:
- message("skipping bad link / annot item %i.", i)
- continue
- try:
- annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text))
- ind_obj = mupdf.pdf_new_indirect( page.doc(), mupdf.pdf_to_num( annot), 0)
- mupdf.pdf_array_push( annots, ind_obj)
- except Exception:
- if g_exceptions_verbose: exception_info()
- message("skipping bad link / annot item %i.\n" % i)
- def _addWidget(self, field_type, field_name):
- page = self._pdf_page()
- pdf = page.doc()
- annot = JM_create_widget(pdf, page, field_type, field_name)
- if not annot.m_internal:
- raise RuntimeError( "cannot create widget")
- JM_add_annot_id(annot, "W")
- return Annot(annot)
- def _apply_redactions(self, text, images, graphics):
- page = self._pdf_page()
- opts = mupdf.PdfRedactOptions()
- opts.black_boxes = 0 # no black boxes
- opts.text = text # how to treat text
- opts.image_method = images # how to treat images
- opts.line_art = graphics # how to treat vector graphics
- success = mupdf.pdf_redact_page(page.doc(), page, opts)
- return success
- def _erase(self):
- self._reset_annot_refs()
- try:
- self.parent._forget_page(self)
- except Exception:
- exception_info()
- pass
- self.parent = None
- self.thisown = False
- self.number = None
- self.this = None
- def _count_q_balance(self):
- """Count missing graphic state pushs and pops.
- Returns:
- A pair of integers (push, pop). Push is the number of missing
- PDF "q" commands, pop is the number of "Q" commands.
- A balanced graphics state for the page will be reached if its
- /Contents is prepended with 'push' copies of string "q\n"
- and appended with 'pop' copies of "\nQ".
- """
- page = _as_pdf_page(self) # need the underlying PDF page
- res = mupdf.pdf_dict_get( # access /Resources
- page.obj(),
- mupdf.PDF_ENUM_NAME_Resources,
- )
- cont = mupdf.pdf_dict_get( # access /Contents
- page.obj(),
- mupdf.PDF_ENUM_NAME_Contents,
- )
- pdf = _as_pdf_document(self.parent) # need underlying PDF document
- # return value of MuPDF function
- return mupdf.pdf_count_q_balance_outparams_fn(pdf, res, cont)
- def _get_optional_content(self, oc: OptInt) -> OptStr:
- if oc is None or oc == 0:
- return None
- doc = self.parent
- check = doc.xref_object(oc, compressed=True)
- if not ("/Type/OCG" in check or "/Type/OCMD" in check):
- #log( 'raising "bad optional content"')
- raise ValueError("bad optional content: 'oc'")
- #log( 'Looking at self._get_resource_properties()')
- props = {}
- for p, x in self._get_resource_properties():
- props[x] = p
- if oc in props.keys():
- return props[oc]
- i = 0
- mc = "MC%i" % i
- while mc in props.values():
- i += 1
- mc = "MC%i" % i
- self._set_resource_property(mc, oc)
- #log( 'returning {mc=}')
- return mc
- def _get_resource_properties(self):
- '''
- page list Resource/Properties
- '''
- page = self._pdf_page()
- rc = JM_get_resource_properties(page.obj())
- return rc
- def _get_textpage(self, clip=None, flags=0, matrix=None):
- if 1 or g_use_extra:
- ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
- tpage = mupdf.FzStextPage(ll_tpage)
- return tpage
- page = self.this
- options = mupdf.FzStextOptions(flags)
- rect = JM_rect_from_py(clip)
- # Default to page's rect if `clip` not specified, for #2048.
- rect = mupdf.fz_bound_page(page) if clip is None else JM_rect_from_py(clip)
- ctm = JM_matrix_from_py(matrix)
- tpage = mupdf.FzStextPage(rect)
- dev = mupdf.fz_new_stext_device(tpage, options)
- if _globals.no_device_caching:
- mupdf.fz_enable_device_hints( dev, mupdf.FZ_NO_CACHE)
- if isinstance(page, mupdf.FzPage):
- pass
- elif isinstance(page, mupdf.PdfPage):
- page = page.super()
- else:
- assert 0, f'Unrecognised {type(page)=}'
- mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- return tpage
- def _insert_image(self,
- filename=None, pixmap=None, stream=None, imask=None, clip=None,
- overlay=1, rotate=0, keep_proportion=1, oc=0, width=0, height=0,
- xref=0, alpha=-1, _imgname=None, digests=None
- ):
- maskbuf = mupdf.FzBuffer()
- page = self._pdf_page()
- # This will create an empty PdfDocument with a call to
- # pdf_new_document() then assign page.doc()'s return value to it (which
- # drop the original empty pdf_document).
- pdf = page.doc()
- w = width
- h = height
- img_xref = xref
- rc_digest = 0
- do_process_pixmap = 1
- do_process_stream = 1
- do_have_imask = 1
- do_have_image = 1
- do_have_xref = 1
- if xref > 0:
- ref = mupdf.pdf_new_indirect(pdf, xref, 0)
- w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
- h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
- if w + h == 0:
- raise ValueError( MSG_IS_NO_IMAGE)
- #goto have_xref()
- do_process_pixmap = 0
- do_process_stream = 0
- do_have_imask = 0
- do_have_image = 0
- else:
- if stream:
- imgbuf = JM_BufferFromBytes(stream)
- do_process_pixmap = 0
- else:
- if filename:
- imgbuf = mupdf.fz_read_file(filename)
- #goto have_stream()
- do_process_pixmap = 0
- if do_process_pixmap:
- #log( 'do_process_pixmap')
- # process pixmap ---------------------------------
- arg_pix = pixmap.this
- w = arg_pix.w()
- h = arg_pix.h()
- digest = mupdf.fz_md5_pixmap2(arg_pix)
- md5_py = digest
- temp = digests.get(md5_py, None)
- if temp is not None:
- img_xref = temp
- ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
- #goto have_xref()
- do_process_stream = 0
- do_have_imask = 0
- do_have_image = 0
- else:
- if arg_pix.alpha() == 0:
- image = mupdf.fz_new_image_from_pixmap(arg_pix, mupdf.FzImage())
- else:
- pm = mupdf.fz_convert_pixmap(
- arg_pix,
- mupdf.FzColorspace(),
- mupdf.FzColorspace(),
- mupdf.FzDefaultColorspaces(None),
- mupdf.FzColorParams(),
- 1,
- )
- pm.alpha = 0
- pm.colorspace = None
- mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage())
- image = mupdf.fz_new_image_from_pixmap(arg_pix, mask)
- #goto have_image()
- do_process_stream = 0
- do_have_imask = 0
- if do_process_stream:
- #log( 'do_process_stream')
- # process stream ---------------------------------
- state = mupdf.FzMd5()
- if mupdf_cppyy:
- mupdf.fz_md5_update_buffer( state, imgbuf)
- else:
- mupdf.fz_md5_update(state, imgbuf.m_internal.data, imgbuf.m_internal.len)
- if imask:
- maskbuf = JM_BufferFromBytes(imask)
- if mupdf_cppyy:
- mupdf.fz_md5_update_buffer( state, maskbuf)
- else:
- mupdf.fz_md5_update(state, maskbuf.m_internal.data, maskbuf.m_internal.len)
- digest = mupdf.fz_md5_final2(state)
- md5_py = bytes(digest)
- temp = digests.get(md5_py, None)
- if temp is not None:
- img_xref = temp
- ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
- w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
- h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
- #goto have_xref()
- do_have_imask = 0
- do_have_image = 0
- else:
- image = mupdf.fz_new_image_from_buffer(imgbuf)
- w = image.w()
- h = image.h()
- if not imask:
- #goto have_image()
- do_have_imask = 0
- if do_have_imask:
- # `fz_compressed_buffer` is reference counted and
- # `mupdf.fz_new_image_from_compressed_buffer2()`
- # is povided as a Swig-friendly wrapper for
- # `fz_new_image_from_compressed_buffer()`, so we can do things
- # straightfowardly.
- #
- cbuf1 = mupdf.fz_compressed_image_buffer( image)
- if not cbuf1.m_internal:
- raise ValueError( "uncompressed image cannot have mask")
- bpc = image.bpc()
- colorspace = image.colorspace()
- xres, yres = mupdf.fz_image_resolution(image)
- mask = mupdf.fz_new_image_from_buffer(maskbuf)
- image = mupdf.fz_new_image_from_compressed_buffer2(
- w,
- h,
- bpc,
- colorspace,
- xres,
- yres,
- 1, # interpolate
- 0, # imagemask,
- list(), # decode
- list(), # colorkey
- cbuf1,
- mask,
- )
-
- if do_have_image:
- #log( 'do_have_image')
- ref = mupdf.pdf_add_image(pdf, image)
- if oc:
- JM_add_oc_object(pdf, ref, oc)
- img_xref = mupdf.pdf_to_num(ref)
- digests[md5_py] = img_xref
- rc_digest = 1
- if do_have_xref:
- #log( 'do_have_xref')
- resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
- xobject = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
- if not xobject.m_internal:
- xobject = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 2)
- mat = calc_image_matrix(w, h, clip, rotate, keep_proportion)
- mupdf.pdf_dict_puts(xobject, _imgname, ref)
- nres = mupdf.fz_new_buffer(50)
- s = f"\nq\n{_format_g((mat.a, mat.b, mat.c, mat.d, mat.e, mat.f))} cm\n/{_imgname} Do\nQ\n"
- #s = s.replace('\n', '\r\n')
- mupdf.fz_append_string(nres, s)
- JM_insert_contents(pdf, page.obj(), nres, overlay)
- if rc_digest:
- return img_xref, digests
- else:
- return img_xref, None
- def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
- page = self._pdf_page()
- pdf = page.doc()
- value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering)
- # get the objects /Resources, /Resources/Font
- resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
- if not resources.pdf_is_dict():
- resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME("Resources"), 5)
- fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
- if not fonts.m_internal: # page has no fonts yet
- fonts = mupdf.pdf_new_dict(pdf, 5)
- mupdf.pdf_dict_putl(page.obj(), fonts, PDF_NAME('Resources'), PDF_NAME('Font'))
- # store font in resources and fonts objects will contain named reference to font
- _, xref = JM_INT_ITEM(value, 0)
- if not xref:
- raise RuntimeError( "cannot insert font")
- font_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- mupdf.pdf_dict_puts(fonts, fontname, font_obj)
- return value
- def _load_annot(self, name, xref):
- page = self._pdf_page()
- if xref == 0:
- annot = JM_get_annot_by_name(page, name)
- else:
- annot = JM_get_annot_by_xref(page, xref)
- if annot.m_internal:
- return Annot(annot)
- def _makePixmap(self, doc, ctm, cs, alpha=0, annots=1, clip=None):
- pix = JM_pixmap_from_page(doc, self.this, ctm, cs, alpha, annots, clip)
- return Pixmap(pix)
- def _other_box(self, boxtype):
- rect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
- page = _as_pdf_page(self.this, required=False)
- if page.m_internal:
- obj = mupdf.pdf_dict_gets( page.obj(), boxtype)
- if mupdf.pdf_is_array(obj):
- rect = mupdf.pdf_to_rect(obj)
- if mupdf.fz_is_infinite_rect( rect):
- return
- return JM_py_from_rect(rect)
- def _pdf_page(self, required=True):
- return _as_pdf_page(self.this, required=required)
- def _reset_annot_refs(self):
- """Invalidate / delete all annots of this page."""
- self._annot_refs.clear()
- def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None):
- if CA >= 1 and ca >= 1 and blendmode is None:
- return
- tCA = int(round(max(CA , 0) * 100))
- if tCA >= 100:
- tCA = 99
- tca = int(round(max(ca, 0) * 100))
- if tca >= 100:
- tca = 99
- gstate = "fitzca%02i%02i" % (tCA, tca)
- if not gstate:
- return
- page = _as_pdf_page(self.this)
- resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
- extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
- if not extg.m_internal:
- extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), 2)
- n = mupdf.pdf_dict_len(extg)
- for i in range(n):
- o1 = mupdf.pdf_dict_get_key(extg, i)
- name = mupdf.pdf_to_name(o1)
- if name == gstate:
- return gstate
- opa = mupdf.pdf_new_dict(page.doc(), 3)
- mupdf.pdf_dict_put_real(opa, PDF_NAME('CA'), CA)
- mupdf.pdf_dict_put_real(opa, PDF_NAME('ca'), ca)
- mupdf.pdf_dict_puts(extg, gstate, opa)
- return gstate
- def _set_pagebox(self, boxtype, rect):
- doc = self.parent
- if doc is None:
- raise ValueError("orphaned object: parent is None")
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox")
- if boxtype not in valid_boxes:
- raise ValueError("bad boxtype")
- rect = Rect(rect)
- mb = self.mediabox
- rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
- if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1):
- raise ValueError(f"{boxtype} not in MediaBox")
- doc.xref_set_key(self.xref, boxtype, f"[{_format_g(tuple(rect))}]")
- def _set_resource_property(self, name, xref):
- page = self._pdf_page()
- JM_set_resource_property(page.obj(), name, xref)
- def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None):
- cropbox = JM_rect_from_py(clip)
- mat = JM_matrix_from_py(matrix)
- rc_xref = xref
- tpage = _as_pdf_page(self.this)
- tpageref = tpage.obj()
- pdfout = tpage.doc() # target PDF
- ENSURE_OPERATION(pdfout)
- #-------------------------------------------------------------
- # convert the source page to a Form XObject
- #-------------------------------------------------------------
- xobj1 = JM_xobject_from_page(pdfout, fz_srcpage, xref, graftmap.this)
- if not rc_xref:
- rc_xref = mupdf.pdf_to_num(xobj1)
- #-------------------------------------------------------------
- # create referencing XObject (controls display on target page)
- #-------------------------------------------------------------
- # fill reference to xobj1 into the /Resources
- #-------------------------------------------------------------
- subres1 = mupdf.pdf_new_dict(pdfout, 5)
- mupdf.pdf_dict_puts(subres1, "fullpage", xobj1)
- subres = mupdf.pdf_new_dict(pdfout, 5)
- mupdf.pdf_dict_put(subres, PDF_NAME('XObject'), subres1)
- res = mupdf.fz_new_buffer(20)
- mupdf.fz_append_string(res, "/fullpage Do")
- xobj2 = mupdf.pdf_new_xobject(pdfout, cropbox, mat, subres, res)
- if oc > 0:
- JM_add_oc_object(pdfout, mupdf.pdf_resolve_indirect(xobj2), oc)
- #-------------------------------------------------------------
- # update target page with xobj2:
- #-------------------------------------------------------------
- # 1. insert Xobject in Resources
- #-------------------------------------------------------------
- resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'), 5)
- subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
- if not subres.m_internal:
- subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
- mupdf.pdf_dict_puts(subres, _imgname, xobj2)
- #-------------------------------------------------------------
- # 2. make and insert new Contents object
- #-------------------------------------------------------------
- nres = mupdf.fz_new_buffer(50) # buffer for Do-command
- mupdf.fz_append_string(nres, " q /") # Do-command
- mupdf.fz_append_string(nres, _imgname)
- mupdf.fz_append_string(nres, " Do Q ")
- JM_insert_contents(pdfout, tpageref, nres, overlay)
- return rc_xref
- def add_caret_annot(self, point: point_like) -> Annot:
- """Add a 'Caret' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_caret_annot(point)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot = Annot( annot)
- annot_postprocess(self, annot)
- assert hasattr( annot, 'parent')
- return annot
- def add_circle_annot(self, rect: rect_like) -> Annot:
- """Add a 'Circle' (ellipse, oval) annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_CIRCLE)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_file_annot(
- self,
- point: point_like,
- buffer_: ByteString,
- filename: str,
- ufilename: OptStr =None,
- desc: OptStr =None,
- icon: OptStr =None
- ) -> Annot:
- """Add a 'FileAttachment' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_file_annot(point,
- buffer_,
- filename,
- ufilename=ufilename,
- desc=desc,
- icon=icon,
- )
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_freetext_annot(
- self,
- rect: rect_like,
- text: str,
- *,
- fontsize: float =11,
- fontname: OptStr =None,
- text_color: OptSeq =None,
- fill_color: OptSeq =None,
- border_color: OptSeq =None,
- border_width: float =0,
- dashes: OptSeq =None,
- callout: OptSeq =None,
- line_end: int=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
- opacity: float =1,
- align: int =0,
- rotate: int =0,
- richtext=False,
- style=None,
- ) -> Annot:
- """Add a 'FreeText' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_freetext_annot(
- rect,
- text,
- fontsize=fontsize,
- fontname=fontname,
- text_color=text_color,
- fill_color=fill_color,
- border_color=border_color,
- border_width=border_width,
- dashes=dashes,
- callout=callout,
- line_end=line_end,
- opacity=opacity,
- align=align,
- rotate=rotate,
- richtext=richtext,
- style=style,
- )
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_highlight_annot(self, quads=None, start=None,
- stop=None, clip=None) -> Annot:
- """Add a 'Highlight' annotation."""
- if quads is None:
- q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
- else:
- q = CheckMarkerArg(quads)
- ret = self._add_text_marker(q, mupdf.PDF_ANNOT_HIGHLIGHT)
- return ret
- def add_ink_annot(self, handwriting: list) -> Annot:
- """Add a 'Ink' ('handwriting') annotation.
- The argument must be a list of lists of point_likes.
- """
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_ink_annot(handwriting)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_line_annot(self, p1: point_like, p2: point_like) -> Annot:
- """Add a 'Line' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_line_annot(p1, p2)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_polygon_annot(self, points: list) -> Annot:
- """Add a 'Polygon' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLYGON)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_polyline_annot(self, points: list) -> Annot:
- """Add a 'PolyLine' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLY_LINE)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_rect_annot(self, rect: rect_like) -> Annot:
- """Add a 'Square' (rectangle) annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_SQUARE)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_redact_annot(
- self,
- quad,
- text: OptStr =None,
- fontname: OptStr =None,
- fontsize: float =11,
- align: int =0,
- fill: OptSeq =None,
- text_color: OptSeq =None,
- cross_out: bool =True,
- ) -> Annot:
- """Add a 'Redact' annotation."""
- da_str = None
- if text and not set(string.whitespace).issuperset(text):
- CheckColor(fill)
- CheckColor(text_color)
- if not fontname:
- fontname = "Helv"
- if not fontsize:
- fontsize = 11
- if not text_color:
- text_color = (0, 0, 0)
- if hasattr(text_color, "__float__"):
- text_color = (text_color, text_color, text_color)
- if len(text_color) > 3:
- text_color = text_color[:3]
- fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf"
- da_str = fmt.format(*text_color, f=fontname, s=fontsize)
- if fill is None:
- fill = (1, 1, 1)
- if fill:
- if hasattr(fill, "__float__"):
- fill = (fill, fill, fill)
- if len(fill) > 3:
- fill = fill[:3]
- else:
- text = None
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_redact_annot(quad, text=text, da_str=da_str,
- align=align, fill=fill)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- #-------------------------------------------------------------
- # change appearance to show a crossed-out rectangle
- #-------------------------------------------------------------
- if cross_out:
- ap_tab = annot._getAP().splitlines()[:-1] # get the 4 commands only
- _, LL, LR, UR, UL = ap_tab
- ap_tab.append(LR)
- ap_tab.append(LL)
- ap_tab.append(UR)
- ap_tab.append(LL)
- ap_tab.append(UL)
- ap_tab.append(b"S")
- ap = b"\n".join(ap_tab)
- annot._setAP(ap, 0)
- return annot
- def add_squiggly_annot(
- self,
- quads=None,
- start=None,
- stop=None,
- clip=None,
- ) -> Annot:
- """Add a 'Squiggly' annotation."""
- if quads is None:
- q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
- else:
- q = CheckMarkerArg(quads)
- return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY)
- def add_stamp_annot(self, rect: rect_like, stamp=0) -> Annot:
- """Add a ('rubber') 'Stamp' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_stamp_annot(rect, stamp)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
- """Add a 'StrikeOut' annotation."""
- if quads is None:
- q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
- else:
- q = CheckMarkerArg(quads)
- return self._add_text_marker(q, mupdf.PDF_ANNOT_STRIKE_OUT)
- def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> Annot:
- """Add a 'Text' (sticky note) annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_text_annot(point, text, icon=icon)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
- """Add a 'Underline' annotation."""
- if quads is None:
- q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
- else:
- q = CheckMarkerArg(quads)
- return self._add_text_marker(q, mupdf.PDF_ANNOT_UNDERLINE)
- def add_widget(self, widget: Widget) -> Annot:
- """Add a 'Widget' (form field)."""
- CheckParent(self)
- doc = self.parent
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- widget._validate()
- annot = self._addWidget(widget.field_type, widget.field_name)
- if not annot:
- return None
- annot.thisown = True
- annot.parent = weakref.proxy(self) # owning page object
- self._annot_refs[id(annot)] = annot
- widget.parent = annot.parent
- widget._annot = annot
- widget.update()
- return annot
- def annot_names(self):
- '''
- page get list of annot names
- '''
- """List of names of annotations, fields and links."""
- CheckParent(self)
- page = self._pdf_page(required=False)
- if not page.m_internal:
- return []
- return JM_get_annot_id_list(page)
- def annot_xrefs(self):
- '''
- List of xref numbers of annotations, fields and links.
- '''
- return JM_get_annot_xref_list2(self)
-
- def annots(self, types=None):
- """ Generator over the annotations of a page.
- Args:
- types: (list) annotation types to subselect from. If none,
- all annotations are returned. E.g. types=[PDF_ANNOT_LINE]
- will only yield line annotations.
- """
- skip_types = (mupdf.PDF_ANNOT_LINK, mupdf.PDF_ANNOT_POPUP, mupdf.PDF_ANNOT_WIDGET)
- if not hasattr(types, "__getitem__"):
- annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types]
- else:
- annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types]
- for xref in annot_xrefs:
- annot = self.load_annot(xref)
- annot._yielded=True
- yield annot
- def apply_redactions(
- page: 'Page',
- images: int = 2,
- graphics: int = 1,
- text: int = 0,
- ) -> bool:
- """Apply the redaction annotations of the page.
- Args:
- page: the PDF page.
- images:
- 0 - ignore images
- 1 - remove all overlapping images
- 2 - blank out overlapping image parts
- 3 - remove image unless invisible
- graphics:
- 0 - ignore graphics
- 1 - remove graphics if contained in rectangle
- 2 - remove all overlapping graphics
- text:
- 0 - remove text
- 1 - ignore text
- """
- def center_rect(annot_rect, new_text, font, fsize):
- """Calculate minimal sub-rectangle for the overlay text.
- Notes:
- Because 'insert_textbox' supports no vertical text centering,
- we calculate an approximate number of lines here and return a
- sub-rect with smaller height, which should still be sufficient.
- Args:
- annot_rect: the annotation rectangle
- new_text: the text to insert.
- font: the fontname. Must be one of the CJK or Base-14 set, else
- the rectangle is returned unchanged.
- fsize: the fontsize
- Returns:
- A rectangle to use instead of the annot rectangle.
- """
- if not new_text or annot_rect.width <= EPSILON:
- return annot_rect
- try:
- text_width = get_text_length(new_text, font, fsize)
- except (ValueError, mupdf.FzErrorBase): # unsupported font
- if g_exceptions_verbose:
- exception_info()
- return annot_rect
- line_height = fsize * 1.2
- limit = annot_rect.width
- h = math.ceil(text_width / limit) * line_height # estimate rect height
- if h >= annot_rect.height:
- return annot_rect
- r = annot_rect
- y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
- r.y0 = y
- return r
- CheckParent(page)
- doc = page.parent
- if doc.is_encrypted or doc.is_closed:
- raise ValueError("document closed or encrypted")
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- redact_annots = [] # storage of annot values
- for annot in page.annots(
- types=(mupdf.PDF_ANNOT_REDACT,) # pylint: disable=no-member
- ):
- # loop redactions
- redact_annots.append(annot._get_redact_values()) # save annot values
- if redact_annots == []: # any redactions on this page?
- return False # no redactions
- rc = page._apply_redactions(text, images, graphics) # call MuPDF
- if not rc: # should not happen really
- raise ValueError("Error applying redactions.")
- # now write replacement text in old redact rectangles
- shape = page.new_shape()
- for redact in redact_annots:
- annot_rect = redact["rect"]
- fill = redact["fill"]
- if fill:
- shape.draw_rect(annot_rect) # colorize the rect background
- shape.finish(fill=fill, color=fill)
- if "text" in redact.keys(): # if we also have text
- new_text = redact["text"]
- align = redact.get("align", 0)
- fname = redact["fontname"]
- fsize = redact["fontsize"]
- color = redact["text_color"]
- # try finding vertical centered sub-rect
- trect = center_rect(annot_rect, new_text, fname, fsize)
- rc = -1
- while rc < 0 and fsize >= 4: # while not enough room
- # (re-) try insertion
- rc = shape.insert_textbox(
- trect,
- new_text,
- fontname=fname,
- fontsize=fsize,
- color=color,
- align=align,
- )
- fsize -= 0.5 # reduce font if unsuccessful
- shape.commit() # append new contents object
- return True
- def recolor(self, components=1):
- """Convert colorspaces of objects on the page.
-
- Valid values are 1, 3 and 4.
- """
- if components not in (1, 3, 4):
- raise ValueError("components must be one of 1, 3, 4")
- pdfdoc = _as_pdf_document(self.parent)
- ropt = mupdf.pdf_recolor_options()
- ropt.num_comp = components
- ropts = mupdf.PdfRecolorOptions(ropt)
- mupdf.pdf_recolor_page(pdfdoc, self.number, ropts)
- def clip_to_rect(self, rect):
- """Clip away page content outside the rectangle."""
- clip = Rect(rect)
- if clip.is_infinite or (clip & self.rect).is_empty:
- raise ValueError("rect must not be infinite or empty")
- clip *= self.transformation_matrix
- pdfpage = _as_pdf_page(self)
- pclip = JM_rect_from_py(clip)
- mupdf.pdf_clip_page(pdfpage, pclip)
- def get_layout(self):
- """Try to access layout information."""
- if self.layout_information is not None:
- # layout information already present
- return
- if not _get_layout:
- # no layout information available
- return
- layout_info = _get_layout(self)
- self.layout_information = layout_info
- @property
- def artbox(self):
- """The ArtBox"""
- rect = self._other_box("ArtBox")
- if rect is None:
- return self.cropbox
- mb = self.mediabox
- return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
- @property
- def bleedbox(self):
- """The BleedBox"""
- rect = self._other_box("BleedBox")
- if rect is None:
- return self.cropbox
- mb = self.mediabox
- return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
- def bound(self):
- """Get page rectangle."""
- CheckParent(self)
- page = _as_fz_page(self.this)
- val = mupdf.fz_bound_page(page)
- val = Rect(val)
-
- if val.is_infinite and self.parent.is_pdf:
- cb = self.cropbox
- w, h = cb.width, cb.height
- if self.rotation not in (0, 180):
- w, h = h, w
- val = Rect(0, 0, w, h)
- msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1]
- message(msg)
-
- return val
- def clean_contents(self, sanitize=1):
- if not sanitize and not self.is_wrapped:
- self.wrap_contents()
- page = _as_pdf_page( self.this, required=False)
- if not page.m_internal:
- return
- filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize)
- mupdf.pdf_filter_page_contents( page.doc(), page, filter_)
-
- @property
- def cropbox(self):
- """The CropBox."""
- CheckParent(self)
- page = self._pdf_page(required=False)
- if not page.m_internal:
- val = mupdf.fz_bound_page(self.this)
- else:
- val = JM_cropbox(page.obj())
- val = Rect(val)
- return val
- @property
- def cropbox_position(self):
- return self.cropbox.tl
- def delete_annot(self, annot):
- """Delete annot and return next one."""
- CheckParent(self)
- CheckParent(annot)
- page = self._pdf_page()
- while 1:
- # first loop through all /IRT annots and remove them
- irt_annot = JM_find_annot_irt(annot.this)
- if not irt_annot: # no more there
- break
- mupdf.pdf_delete_annot(page, irt_annot.this)
- nextannot = mupdf.pdf_next_annot(annot.this) # store next
- mupdf.pdf_delete_annot(page, annot.this)
- val = Annot(nextannot)
- if val:
- val.thisown = True
- val.parent = weakref.proxy(self) # owning page object
- val.parent._annot_refs[id(val)] = val
- annot._erase()
- return val
- def delete_image(page: 'Page', xref: int):
- """Delete the image referred to by xef.
- Actually replaces by a small transparent Pixmap using method Page.replace_image.
- Args:
- xref: xref of the image to delete.
- """
- # make a small 100% transparent pixmap (of just any dimension)
- pix = Pixmap(csGRAY, (0, 0, 1, 1), 1)
- pix.clear_with() # clear all samples bytes to 0x00
- page.replace_image(xref, pixmap=pix)
- def delete_link(self, linkdict):
- """Delete a Link."""
- CheckParent(self)
- if not isinstance( linkdict, dict):
- return # have no dictionary
- def finished():
- if linkdict["xref"] == 0: return
- try:
- linkid = linkdict["id"]
- linkobj = self._annot_refs[linkid]
- linkobj._erase()
- except Exception:
- # Don't print this exception, to match classic. Issue #2841.
- if g_exceptions_verbose > 1: exception_info()
- pass
- page = _as_pdf_page(self.this, required=False)
- if not page.m_internal:
- return finished() # have no PDF
- xref = linkdict[dictkey_xref]
- if xref < 1:
- return finished() # invalid xref
- annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
- if not annots.m_internal:
- return finished() # have no annotations
- len_ = mupdf.pdf_array_len( annots)
- if len_ == 0:
- return finished()
- oxref = 0
- for i in range( len_):
- oxref = mupdf.pdf_to_num( mupdf.pdf_array_get( annots, i))
- if xref == oxref:
- break # found xref in annotations
- if xref != oxref:
- return finished() # xref not in annotations
- mupdf.pdf_array_delete( annots, i) # delete entry in annotations
- mupdf.pdf_delete_object( page.doc(), xref) # delete link object
- mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots)
- JM_refresh_links( page)
- return finished()
- def delete_widget(page: 'Page', widget: Widget) -> Widget:
- """Delete widget from page and return the next one."""
- CheckParent(page)
- annot = getattr(widget, "_annot", None)
- if annot is None:
- raise ValueError("bad type: widget")
- nextwidget = widget.next
- page.delete_annot(annot)
- widget._annot.parent = None
- keylist = list(widget.__dict__.keys())
- for key in keylist:
- del widget.__dict__[key]
- return nextwidget
- @property
- def derotation_matrix(self) -> Matrix:
- """Reflects page de-rotation."""
- if g_use_extra:
- return Matrix(extra.Page_derotate_matrix( self.this))
- pdfpage = self._pdf_page(required=False)
- if not pdfpage.m_internal:
- return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
- return Matrix(JM_derotate_page_matrix(pdfpage))
- def draw_bezier(
- page: 'Page',
- p1: point_like,
- p2: point_like,
- p3: point_like,
- p4: point_like,
- color: OptSeq = (0,),
- fill: OptSeq = None,
- dashes: OptStr = None,
- width: float = 1,
- morph: OptStr = None,
- closePath: bool = False,
- lineCap: int = 0,
- lineJoin: int = 0,
- overlay: bool = True,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> Point:
- """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
- img = page.new_shape()
- Q = img.draw_bezier(Point(p1), Point(p2), Point(p3), Point(p4))
- img.finish(
- color=color,
- fill=fill,
- dashes=dashes,
- width=width,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- closePath=closePath,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return Q
- def draw_circle(
- page: 'Page',
- center: point_like,
- radius: float,
- color: OptSeq = (0,),
- fill: OptSeq = None,
- morph: OptSeq = None,
- dashes: OptStr = None,
- width: float = 1,
- lineCap: int = 0,
- lineJoin: int = 0,
- overlay: bool = True,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> Point:
- """Draw a circle given its center and radius."""
- img = page.new_shape()
- Q = img.draw_circle(Point(center), radius)
- img.finish(
- color=color,
- fill=fill,
- dashes=dashes,
- width=width,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return Q
- def draw_curve(
- page: 'Page',
- p1: point_like,
- p2: point_like,
- p3: point_like,
- color: OptSeq = (0,),
- fill: OptSeq = None,
- dashes: OptStr = None,
- width: float = 1,
- morph: OptSeq = None,
- closePath: bool = False,
- lineCap: int = 0,
- lineJoin: int = 0,
- overlay: bool = True,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> Point:
- """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
- img = page.new_shape()
- Q = img.draw_curve(Point(p1), Point(p2), Point(p3))
- img.finish(
- color=color,
- fill=fill,
- dashes=dashes,
- width=width,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- closePath=closePath,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return Q
- def draw_line(
- page: 'Page',
- p1: point_like,
- p2: point_like,
- color: OptSeq = (0,),
- dashes: OptStr = None,
- width: float = 1,
- lineCap: int = 0,
- lineJoin: int = 0,
- overlay: bool = True,
- morph: OptSeq = None,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc=0,
- ) -> Point:
- """Draw a line from point p1 to point p2."""
- img = page.new_shape()
- p = img.draw_line(Point(p1), Point(p2))
- img.finish(
- color=color,
- dashes=dashes,
- width=width,
- closePath=False,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return p
- def draw_oval(
- page: 'Page',
- rect: typing.Union[rect_like, quad_like],
- color: OptSeq = (0,),
- fill: OptSeq = None,
- dashes: OptStr = None,
- morph: OptSeq = None,
- width: float = 1,
- lineCap: int = 0,
- lineJoin: int = 0,
- overlay: bool = True,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> Point:
- """Draw an oval given its containing rectangle or quad."""
- img = page.new_shape()
- Q = img.draw_oval(rect)
- img.finish(
- color=color,
- fill=fill,
- dashes=dashes,
- width=width,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return Q
- def draw_polyline(
- page: 'Page',
- points: list,
- color: OptSeq = (0,),
- fill: OptSeq = None,
- dashes: OptStr = None,
- width: float = 1,
- morph: OptSeq = None,
- lineCap: int = 0,
- lineJoin: int = 0,
- overlay: bool = True,
- closePath: bool = False,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> Point:
- """Draw multiple connected line segments."""
- img = page.new_shape()
- Q = img.draw_polyline(points)
- img.finish(
- color=color,
- fill=fill,
- dashes=dashes,
- width=width,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- closePath=closePath,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return Q
- def draw_quad(
- page: 'Page',
- quad: quad_like,
- color: OptSeq = (0,),
- fill: OptSeq = None,
- dashes: OptStr = None,
- width: float = 1,
- lineCap: int = 0,
- lineJoin: int = 0,
- morph: OptSeq = None,
- overlay: bool = True,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> Point:
- """Draw a quadrilateral."""
- img = page.new_shape()
- Q = img.draw_quad(Quad(quad))
- img.finish(
- color=color,
- fill=fill,
- dashes=dashes,
- width=width,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return Q
- def draw_rect(
- page: 'Page',
- rect: rect_like,
- color: OptSeq = (0,),
- fill: OptSeq = None,
- dashes: OptStr = None,
- width: float = 1,
- lineCap: int = 0,
- lineJoin: int = 0,
- morph: OptSeq = None,
- overlay: bool = True,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- radius=None,
- ) -> Point:
- '''
- Draw a rectangle. See Shape class method for details.
- '''
- img = page.new_shape()
- Q = img.draw_rect(Rect(rect), radius=radius)
- img.finish(
- color=color,
- fill=fill,
- dashes=dashes,
- width=width,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return Q
- def draw_sector(
- page: 'Page',
- center: point_like,
- point: point_like,
- beta: float,
- color: OptSeq = (0,),
- fill: OptSeq = None,
- dashes: OptStr = None,
- fullSector: bool = True,
- morph: OptSeq = None,
- width: float = 1,
- closePath: bool = False,
- lineCap: int = 0,
- lineJoin: int = 0,
- overlay: bool = True,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> Point:
- """Draw a circle sector given circle center, one arc end point and the angle of the arc.
- Parameters:
- center -- center of circle
- point -- arc end point
- beta -- angle of arc (degrees)
- fullSector -- connect arc ends with center
- """
- img = page.new_shape()
- Q = img.draw_sector(Point(center), Point(point), beta, fullSector=fullSector)
- img.finish(
- color=color,
- fill=fill,
- dashes=dashes,
- width=width,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- closePath=closePath,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return Q
- def draw_squiggle(
- page: 'Page',
- p1: point_like,
- p2: point_like,
- breadth: float = 2,
- color: OptSeq = (0,),
- dashes: OptStr = None,
- width: float = 1,
- lineCap: int = 0,
- lineJoin: int = 0,
- overlay: bool = True,
- morph: OptSeq = None,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> Point:
- """Draw a squiggly line from point p1 to point p2."""
- img = page.new_shape()
- p = img.draw_squiggle(Point(p1), Point(p2), breadth=breadth)
- img.finish(
- color=color,
- dashes=dashes,
- width=width,
- closePath=False,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return p
- def draw_zigzag(
- page: 'Page',
- p1: point_like,
- p2: point_like,
- breadth: float = 2,
- color: OptSeq = (0,),
- dashes: OptStr = None,
- width: float = 1,
- lineCap: int = 0,
- lineJoin: int = 0,
- overlay: bool = True,
- morph: OptSeq = None,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> Point:
- """Draw a zigzag line from point p1 to point p2."""
- img = page.new_shape()
- p = img.draw_zigzag(Point(p1), Point(p2), breadth=breadth)
- img.finish(
- color=color,
- dashes=dashes,
- width=width,
- closePath=False,
- lineCap=lineCap,
- lineJoin=lineJoin,
- morph=morph,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- img.commit(overlay)
- return p
- def extend_textpage(self, tpage, flags=0, matrix=None):
- page = self.this
- tp = tpage.this
- assert isinstance( tp, mupdf.FzStextPage)
- options = mupdf.FzStextOptions()
- options.flags = flags
- ctm = JM_matrix_from_py(matrix)
- dev = mupdf.FzDevice(tp, options)
- mupdf.fz_run_page( page, dev, ctm, mupdf.FzCookie())
- mupdf.fz_close_device( dev)
- @property
- def first_annot(self):
- """First annotation."""
- CheckParent(self)
- page = self._pdf_page(required=False)
- if not page.m_internal:
- return
- annot = mupdf.pdf_first_annot(page)
- if not annot.m_internal:
- return
- val = Annot(annot)
- val.thisown = True
- val.parent = weakref.proxy(self) # owning page object
- self._annot_refs[id(val)] = val
- return val
- @property
- def first_link(self):
- '''
- First link on page
- '''
- return self.load_links()
- @property
- def first_widget(self):
- """First widget/field."""
- CheckParent(self)
- annot = 0
- page = self._pdf_page(required=False)
- if not page.m_internal:
- return
- annot = mupdf.pdf_first_widget(page)
- if not annot.m_internal:
- return
- val = Annot(annot)
- val.thisown = True
- val.parent = weakref.proxy(self) # owning page object
- self._annot_refs[id(val)] = val
- widget = Widget()
- TOOLS._fill_widget(val, widget)
- val = widget
- return val
- def get_bboxlog(self, layers=None):
- CheckParent(self)
- old_rotation = self.rotation
- if old_rotation != 0:
- self.set_rotation(0)
- page = self.this
- rc = []
- inc_layers = True if layers else False
- dev = JM_new_bbox_device( rc, inc_layers)
- mupdf.fz_run_page( page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device( dev)
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- return rc
- def get_cdrawings(self, extended=None, callback=None, method=None):
- """Extract vector graphics ("line art") from the page."""
- CheckParent(self)
- old_rotation = self.rotation
- if old_rotation != 0:
- self.set_rotation(0)
- page = self.this
- if isinstance(page, mupdf.PdfPage):
- # Downcast pdf_page to fz_page.
- page = mupdf.FzPage(page)
- assert isinstance(page, mupdf.FzPage), f'{self.this=}'
- clips = True if extended else False
- prect = mupdf.fz_bound_page(page)
- if 1 or g_use_extra:
- rc = extra.get_cdrawings(page, extended, callback, method)
- else:
- rc = list()
- if callable(callback) or method is not None:
- dev = JM_new_lineart_device_Device(callback, clips, method)
- else:
- dev = JM_new_lineart_device_Device(rc, clips, method)
- dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
- mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- if callable(callback) or method is not None:
- return
- return rc
- def get_contents(self):
- """Get xrefs of /Contents objects."""
- CheckParent(self)
- ret = []
- page = _as_pdf_page(self.this)
- obj = page.obj()
- contents = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Contents)
- if mupdf.pdf_is_array(contents):
- n = mupdf.pdf_array_len(contents)
- for i in range(n):
- icont = mupdf.pdf_array_get(contents, i)
- xref = mupdf.pdf_to_num(icont)
- ret.append(xref)
- elif contents.m_internal:
- xref = mupdf.pdf_to_num(contents)
- ret.append( xref)
- return ret
- def get_displaylist(self, annots=1):
- '''
- Make a DisplayList from the page for Pixmap generation.
- Include (default) or exclude annotations.
- '''
- CheckParent(self)
- if annots:
- dl = mupdf.fz_new_display_list_from_page(self.this)
- else:
- dl = mupdf.fz_new_display_list_from_page_contents(self.this)
- return DisplayList(dl)
- def get_drawings(self, extended: bool=False) -> list:
- """Retrieve vector graphics. The extended version includes clips.
- Note:
- For greater comfort, this method converts point-likes, rect-likes, quad-likes
- of the C version to respective Point / Rect / Quad objects.
- It also adds default items that are missing in original path types.
- """
- allkeys = (
- 'closePath',
- 'fill',
- 'color',
- 'width',
- 'lineCap',
- 'lineJoin',
- 'dashes',
- 'stroke_opacity',
- 'fill_opacity',
- 'even_odd',
- )
- val = self.get_cdrawings(extended=extended)
- for i in range(len(val)):
- npath = val[i]
- if not npath["type"].startswith("clip"):
- npath["rect"] = Rect(npath["rect"])
- else:
- npath["scissor"] = Rect(npath["scissor"])
- if npath["type"]!="group":
- items = npath["items"]
- newitems = []
- for item in items:
- cmd = item[0]
- rest = item[1:]
- if cmd == "re":
- item = ("re", Rect(rest[0]).normalize(), rest[1])
- elif cmd == "qu":
- item = ("qu", Quad(rest[0]))
- else:
- item = tuple([cmd] + [Point(i) for i in rest])
- newitems.append(item)
- npath["items"] = newitems
- if npath['type'] in ('f', 's'):
- for k in allkeys:
- npath[k] = npath.get(k)
- val[i] = npath
- return val
- class Drawpath(object):
- """Reflects a path dictionary from get_cdrawings()."""
- def __init__(self, **args):
- self.__dict__.update(args)
-
- class Drawpathlist(object):
- """List of Path objects representing get_cdrawings() output."""
- def __getitem__(self, item):
- return self.paths.__getitem__(item)
- def __init__(self):
- self.paths = []
- self.path_count = 0
- self.group_count = 0
- self.clip_count = 0
- self.fill_count = 0
- self.stroke_count = 0
- self.fillstroke_count = 0
- def __len__(self):
- return self.paths.__len__()
- def append(self, path):
- self.paths.append(path)
- self.path_count += 1
- if path.type == "clip":
- self.clip_count += 1
- elif path.type == "group":
- self.group_count += 1
- elif path.type == "f":
- self.fill_count += 1
- elif path.type == "s":
- self.stroke_count += 1
- elif path.type == "fs":
- self.fillstroke_count += 1
- def clip_parents(self, i):
- """Return list of parent clip paths.
- Args:
- i: (int) return parents of this path.
- Returns:
- List of the clip parents."""
- if i >= self.path_count:
- raise IndexError("bad path index")
- while i < 0:
- i += self.path_count
- lvl = self.paths[i].level
- clips = list( # clip paths before identified one
- reversed(
- [
- p
- for p in self.paths[:i]
- if p.type == "clip" and p.level < lvl
- ]
- )
- )
- if clips == []: # none found: empty list
- return []
- nclips = [clips[0]] # init return list
- for p in clips[1:]:
- if p.level >= nclips[-1].level:
- continue # only accept smaller clip levels
- nclips.append(p)
- return nclips
- def group_parents(self, i):
- """Return list of parent group paths.
- Args:
- i: (int) return parents of this path.
- Returns:
- List of the group parents."""
- if i >= self.path_count:
- raise IndexError("bad path index")
- while i < 0:
- i += self.path_count
- lvl = self.paths[i].level
- groups = list( # group paths before identified one
- reversed(
- [
- p
- for p in self.paths[:i]
- if p.type == "group" and p.level < lvl
- ]
- )
- )
- if groups == []: # none found: empty list
- return []
- ngroups = [groups[0]] # init return list
- for p in groups[1:]:
- if p.level >= ngroups[-1].level:
- continue # only accept smaller group levels
- ngroups.append(p)
- return ngroups
- def get_lineart(self) -> object:
- """Get page drawings paths.
- Note:
- For greater comfort, this method converts point-like, rect-like, quad-like
- tuples of the C version to respective Point / Rect / Quad objects.
- Also adds default items that are missing in original path types.
- In contrast to get_drawings(), this output is an object.
- """
- val = self.get_cdrawings(extended=True)
- paths = self.Drawpathlist()
- for path in val:
- npath = self.Drawpath(**path)
- if npath.type != "clip":
- npath.rect = Rect(path["rect"])
- else:
- npath.scissor = Rect(path["scissor"])
- if npath.type != "group":
- items = path["items"]
- newitems = []
- for item in items:
- cmd = item[0]
- rest = item[1:]
- if cmd == "re":
- item = ("re", Rect(rest[0]).normalize(), rest[1])
- elif cmd == "qu":
- item = ("qu", Quad(rest[0]))
- else:
- item = tuple([cmd] + [Point(i) for i in rest])
- newitems.append(item)
- npath.items = newitems
-
- if npath.type == "f":
- npath.stroke_opacity = None
- npath.dashes = None
- npath.line_join = None
- npath.line_cap = None
- npath.color = None
- npath.width = None
- paths.append(npath)
- val = None
- return paths
- def get_image_info(
- page: 'Page',
- hashes: bool = False,
- xrefs: bool = False
- ) -> list:
- """Extract image information only from a pymupdf.TextPage.
- Args:
- hashes: (bool) include MD5 hash for each image.
- xrefs: (bool) try to find the xref for each image. Sets hashes to true.
- """
- doc = page.parent
- if xrefs and doc.is_pdf:
- hashes = True
- if not doc.is_pdf:
- xrefs = False
- imginfo = getattr(page, "_image_info", None)
- if imginfo and not xrefs:
- return imginfo
- if not imginfo:
- tp = page.get_textpage(flags=TEXT_PRESERVE_IMAGES)
- imginfo = tp.extractIMGINFO(hashes=hashes)
- del tp
- if hashes:
- page._image_info = imginfo
- if not xrefs or not doc.is_pdf:
- return imginfo
- imglist = page.get_images()
- digests = {}
- for item in imglist:
- xref = item[0]
- pix = Pixmap(doc, xref)
- digests[pix.digest] = xref
- del pix
- for i in range(len(imginfo)):
- item = imginfo[i]
- xref = digests.get(item["digest"], 0)
- item["xref"] = xref
- imginfo[i] = item
- return imginfo
- def get_image_rects(page: 'Page', name, transform=False) -> list:
- """Return list of image positions on a page.
- Args:
- name: (str, list, int) image identification. May be reference name, an
- item of the page's image list or an xref.
- transform: (bool) whether to also return the transformation matrix.
- Returns:
- A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix)
- for all image locations on the page.
- """
- if type(name) in (list, tuple):
- xref = name[0]
- elif type(name) is int:
- xref = name
- else:
- imglist = [i for i in page.get_images() if i[7] == name]
- if imglist == []:
- raise ValueError("bad image name")
- elif len(imglist) != 1:
- raise ValueError("multiple image names found")
- xref = imglist[0][0]
- pix = Pixmap(page.parent, xref) # make pixmap of the image to compute MD5
- digest = pix.digest
- del pix
- infos = page.get_image_info(hashes=True)
- if not transform:
- bboxes = [Rect(im["bbox"]) for im in infos if im["digest"] == digest]
- else:
- bboxes = [
- (Rect(im["bbox"]), Matrix(im["transform"]))
- for im in infos
- if im["digest"] == digest
- ]
- return bboxes
- def get_label(page):
- """Return the label for this PDF page.
- Args:
- page: page object.
- Returns:
- The label (str) of the page. Errors return an empty string.
- """
- # Jorj McKie, 2021-01-06
- labels = page.parent._get_page_labels()
- if not labels:
- return ""
- labels.sort()
- return utils.get_label_pno(page.number, labels)
- def get_links(page: 'Page') -> list:
- """Create a list of all links contained in a PDF page.
- Notes:
- see PyMuPDF ducmentation for details.
- """
- CheckParent(page)
- ln = page.first_link
- links = []
- while ln:
- nl = utils.getLinkDict(ln, page.parent)
- links.append(nl)
- ln = ln.next
- if links != [] and page.parent.is_pdf:
- linkxrefs = [x for x in
- #page.annot_xrefs()
- JM_get_annot_xref_list2(page)
- if x[1] == mupdf.PDF_ANNOT_LINK # pylint: disable=no-member
- ]
- if len(linkxrefs) == len(links):
- for i in range(len(linkxrefs)):
- links[i]["xref"] = linkxrefs[i][0]
- links[i]["id"] = linkxrefs[i][2]
- return links
- def get_pixmap(
- page: 'Page',
- *,
- matrix: matrix_like=Identity,
- dpi=None,
- colorspace: Colorspace=None,
- clip: rect_like=None,
- alpha: bool=False,
- annots: bool=True,
- ) -> 'Pixmap':
- """Create pixmap of page.
- Keyword args:
- matrix: Matrix for transformation (default: Identity).
- dpi: desired dots per inch. If given, matrix is ignored.
- colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
- clip: (irect-like) restrict rendering to this area.
- alpha: (bool) whether to include alpha channel
- annots: (bool) whether to also render annotations
- """
- if colorspace is None:
- colorspace = csRGB
- if dpi:
- zoom = dpi / 72
- matrix = Matrix(zoom, zoom)
- if type(colorspace) is str:
- if colorspace.upper() == "GRAY":
- colorspace = csGRAY
- elif colorspace.upper() == "CMYK":
- colorspace = csCMYK
- else:
- colorspace = csRGB
- if colorspace.n not in (1, 3, 4):
- raise ValueError("unsupported colorspace")
- dl = page.get_displaylist(annots=annots)
- pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
- dl = None
- if dpi:
- pix.set_dpi(dpi, dpi)
- return pix
- def remove_rotation(self):
- """Set page rotation to 0 while maintaining visual appearance."""
- rot = self.rotation # normalized rotation value
- if rot == 0:
- return Identity # nothing to do
- # need to derotate the page's content
- mb = self.mediabox # current mediabox
- if rot == 90:
- # before derotation, shift content horizontally
- mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
- elif rot == 270:
- # before derotation, shift content vertically
- mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
- else: # rot = 180
- mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
- # prefix with derotation matrix
- mat = mat0 * self.derotation_matrix
- cmd = _format_g(tuple(mat)) + ' cm '
- cmd = cmd.encode('utf8')
- _ = TOOLS._insert_contents(self, cmd, False) # prepend to page contents
- # swap x- and y-coordinates
- if rot in (90, 270):
- x0, y0, x1, y1 = mb
- mb.x0 = y0
- mb.y0 = x0
- mb.x1 = y1
- mb.y1 = x1
- self.set_mediabox(mb)
- self.set_rotation(0)
- rot = ~mat # inverse of the derotation matrix
- for annot in self.annots(): # modify rectangles of annotations
- r = annot.rect * rot
- # TODO: only try to set rectangle for applicable annot types
- annot.set_rect(r)
- for link in self.get_links(): # modify 'from' rectangles of links
- r = link["from"] * rot
- self.delete_link(link)
- link["from"] = r
- try: # invalid links remain deleted
- self.insert_link(link)
- except Exception:
- pass
- for widget in self.widgets(): # modify field rectangles
- r = widget.rect * rot
- widget.rect = r
- widget.update()
- return rot # the inverse of the generated derotation matrix
- def cluster_drawings(
- self, clip=None, drawings=None, x_tolerance: float = 3, y_tolerance: float = 3,
- final_filter: bool = True,
- ) -> list:
- """Join rectangles of neighboring vector graphic items.
- Args:
- clip: optional rect-like to restrict the page area to consider.
- drawings: (optional) output of a previous "get_drawings()".
- x_tolerance: horizontal neighborhood threshold.
- y_tolerance: vertical neighborhood threshold.
- Notes:
- Vector graphics (also called line-art or drawings) usually consist
- of independent items like rectangles, lines or curves to jointly
- form table grid lines or bar, line, pie charts and similar.
- This method identifies rectangles wrapping these disparate items.
- Returns:
- A list of Rect items, each wrapping line-art items that are close
- enough to be considered forming a common vector graphic.
- Only "significant" rectangles will be returned, i.e. having both,
- width and height larger than the tolerance values.
- """
- CheckParent(self)
- parea = self.rect # the default clipping area
- if clip is not None:
- parea = Rect(clip)
- delta_x = x_tolerance # shorter local name
- delta_y = y_tolerance # shorter local name
- if drawings is None: # if we cannot re-use a previous output
- drawings = self.get_drawings()
- def are_neighbors(r1, r2):
- """Detect whether r1, r2 are "neighbors".
- Items r1, r2 are called neighbors if the minimum distance between
- their points is less-equal delta.
- Both parameters must be (potentially invalid) rectangles.
- """
- # normalize rectangles as needed
- rr1_x0, rr1_x1 = (r1.x0, r1.x1) if r1.x1 > r1.x0 else (r1.x1, r1.x0)
- rr1_y0, rr1_y1 = (r1.y0, r1.y1) if r1.y1 > r1.y0 else (r1.y1, r1.y0)
- rr2_x0, rr2_x1 = (r2.x0, r2.x1) if r2.x1 > r2.x0 else (r2.x1, r2.x0)
- rr2_y0, rr2_y1 = (r2.y0, r2.y1) if r2.y1 > r2.y0 else (r2.y1, r2.y0)
- if (
- 0
- or rr1_x1 < rr2_x0 - delta_x
- or rr1_x0 > rr2_x1 + delta_x
- or rr1_y1 < rr2_y0 - delta_y
- or rr1_y0 > rr2_y1 + delta_y
- ):
- # Rects do not overlap.
- return False
- else:
- # Rects overlap.
- return True
- # exclude graphics not contained in the clip
- paths = [
- p
- for p in drawings
- if 1
- and p["rect"].x0 >= parea.x0
- and p["rect"].x1 <= parea.x1
- and p["rect"].y0 >= parea.y0
- and p["rect"].y1 <= parea.y1
- ]
- # list of all vector graphic rectangles
- prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
- new_rects = [] # the final list of the joined rectangles
- # -------------------------------------------------------------------------
- # The strategy is to identify and join all rects that are neighbors
- # -------------------------------------------------------------------------
- while prects: # the algorithm will empty this list
- r = +prects[0] # copy of first rectangle
- repeat = True
- while repeat:
- repeat = False
- for i in range(len(prects) - 1, 0, -1): # from back to front
- if are_neighbors(prects[i], r):
- r |= prects[i].tl # include in first rect
- r |= prects[i].br # include in first rect
- del prects[i] # delete this rect
- repeat = True
- new_rects.append(r)
- del prects[0]
- prects = sorted(set(prects), key=lambda r: (r.y1, r.x0))
- new_rects = sorted(set(new_rects), key=lambda r: (r.y1, r.x0))
- if not final_filter:
- return new_rects
- return [r for r in new_rects if r.width > delta_x and r.height > delta_y]
- def get_fonts(self, full=False):
- """List of fonts defined in the page object."""
- CheckParent(self)
- return self.parent.get_page_fonts(self.number, full=full)
- def get_image_bbox(self, name, transform=0):
- """Get rectangle occupied by image 'name'.
- 'name' is either an item of the image list, or the referencing
- name string - elem[7] of the resp. item.
- Option 'transform' also returns the image transformation matrix.
- """
- CheckParent(self)
- doc = self.parent
- if doc.is_closed or doc.is_encrypted:
- raise ValueError('document closed or encrypted')
- inf_rect = Rect(1, 1, -1, -1)
- null_mat = Matrix()
- if transform:
- rc = (inf_rect, null_mat)
- else:
- rc = inf_rect
- if type(name) in (list, tuple):
- if not type(name[-1]) is int:
- raise ValueError('need item of full page image list')
- item = name
- else:
- imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]]
- if len(imglist) == 1:
- item = imglist[0]
- elif imglist == []:
- raise ValueError('bad image name')
- else:
- raise ValueError("found multiple images named '%s'." % name)
- xref = item[-1]
- if xref != 0 or transform:
- try:
- return self.get_image_rects(item, transform=transform)[0]
- except Exception:
- exception_info()
- return inf_rect
- pdf_page = self._pdf_page()
- val = JM_image_reporter(pdf_page)
- if not bool(val):
- return rc
- for v in val:
- if v[0] != item[-3]:
- continue
- q = Quad(v[1])
- bbox = q.rect
- if transform == 0:
- rc = bbox
- break
- hm = Matrix(util_hor_matrix(q.ll, q.lr))
- h = abs(q.ll - q.ul)
- w = abs(q.ur - q.ul)
- m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0)
- m = ~(hm * m0)
- rc = (bbox, m)
- break
- val = rc
- return val
- def get_images(self, full=False):
- """List of images defined in the page object."""
- CheckParent(self)
- return self.parent.get_page_images(self.number, full=full)
- def get_oc_items(self) -> list:
- """Get OCGs and OCMDs used in the page's contents.
- Returns:
- List of items (name, xref, type), where type is one of "ocg" / "ocmd",
- and name is the property name.
- """
- rc = []
- for pname, xref in self._get_resource_properties():
- text = self.parent.xref_object(xref, compressed=True)
- if "/Type/OCG" in text:
- octype = "ocg"
- elif "/Type/OCMD" in text:
- octype = "ocmd"
- else:
- continue
- rc.append((pname, xref, octype))
- return rc
- def get_svg_image(self, matrix=None, text_as_path=1):
- """Make SVG image from page."""
- CheckParent(self)
- mediabox = mupdf.fz_bound_page(self.this)
- ctm = JM_matrix_from_py(matrix)
- tbounds = mediabox
- text_option = mupdf.FZ_SVG_TEXT_AS_PATH if text_as_path == 1 else mupdf.FZ_SVG_TEXT_AS_TEXT
- tbounds = mupdf.fz_transform_rect(tbounds, ctm)
- res = mupdf.fz_new_buffer(1024)
- out = mupdf.FzOutput(res)
- dev = mupdf.fz_new_svg_device(
- out,
- tbounds.x1-tbounds.x0, # width
- tbounds.y1-tbounds.y0, # height
- text_option,
- 1,
- )
- mupdf.fz_run_page(self.this, dev, ctm, mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- out.fz_close_output()
- text = JM_EscapeStrFromBuffer(res)
- return text
- def get_textbox(
- page: Page,
- rect: rect_like,
- textpage=None, #: TextPage = None,
- ) -> str:
- tp = textpage
- if tp is None:
- tp = page.get_textpage()
- elif getattr(tp, "parent") != page:
- raise ValueError("not a textpage of this page")
- rc = tp.extractTextbox(rect)
- if textpage is None:
- del tp
- return rc
- def get_text(self, *args, **kwargs):
- return utils.get_text(self, *args, **kwargs)
- def get_text_blocks(self, *args, **kwargs):
- return utils.get_text_blocks(self, *args, **kwargs)
-
- def get_text_selection(self, *args, **kwargs):
- return utils.get_text_selection(self, *args, **kwargs)
-
- def get_text_words(self, *args, **kwargs):
- return utils.get_text_words(self, *args, **kwargs)
-
- def get_textpage_ocr(self, *args, **kwargs):
- return utils.get_textpage_ocr(self, *args, **kwargs)
-
- def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
- CheckParent(self)
- if matrix is None:
- matrix = Matrix(1, 1)
- old_rotation = self.rotation
- if old_rotation != 0:
- self.set_rotation(0)
- try:
- textpage = self._get_textpage(clip, flags=flags, matrix=matrix)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- textpage = TextPage(textpage)
- textpage.parent = weakref.proxy(self)
- return textpage
- def get_texttrace(self):
- CheckParent(self)
- old_rotation = self.rotation
- if old_rotation != 0:
- self.set_rotation(0)
- page = self.this
- rc = []
- if 1 or g_use_extra:
- dev = extra.JM_new_texttrace_device(rc)
- else:
- dev = JM_new_texttrace_device(rc)
- prect = mupdf.fz_bound_page(page)
- dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
- mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- return rc
- def get_xobjects(self):
- """List of xobjects defined in the page object."""
- CheckParent(self)
- return self.parent.get_page_xobjects(self.number)
- def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None,
- set_simple=False, wmode=0, encoding=0):
- doc = self.parent
- if doc is None:
- raise ValueError("orphaned object: parent is None")
- idx = 0
- if fontname.startswith("/"):
- fontname = fontname[1:]
- inv_chars = INVALID_NAME_CHARS.intersection(fontname)
- if inv_chars != set():
- raise ValueError(f"bad fontname chars {inv_chars}")
- font = CheckFont(self, fontname)
- if font is not None: # font already in font list of page
- xref = font[0] # this is the xref
- if CheckFontInfo(doc, xref): # also in our document font list?
- return xref # yes: we are done
- # need to build the doc FontInfo entry - done via get_char_widths
- doc.get_char_widths(xref)
- return xref
- #--------------------------------------------------------------------------
- # the font is not present for this page
- #--------------------------------------------------------------------------
- bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font
- serif = 0
- CJK_number = -1
- CJK_list_n = ["china-t", "china-s", "japan", "korea"]
- CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"]
- try:
- CJK_number = CJK_list_n.index(fontname)
- serif = 0
- except Exception:
- # Verbose in PyMuPDF/tests.
- if g_exceptions_verbose > 1: exception_info()
- pass
- if CJK_number < 0:
- try:
- CJK_number = CJK_list_s.index(fontname)
- serif = 1
- except Exception:
- # Verbose in PyMuPDF/tests.
- if g_exceptions_verbose > 1: exception_info()
- pass
- if fontname.lower() in fitz_fontdescriptors.keys():
- import pymupdf_fonts
- fontbuffer = pymupdf_fonts.myfont(fontname) # make a copy
- del pymupdf_fonts
- # install the font for the page
- if fontfile is not None:
- if type(fontfile) is str:
- fontfile_str = fontfile
- elif hasattr(fontfile, "absolute"):
- fontfile_str = str(fontfile)
- elif hasattr(fontfile, "name"):
- fontfile_str = fontfile.name
- else:
- raise ValueError("bad fontfile")
- else:
- fontfile_str = None
- val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx,
- wmode, serif, encoding, CJK_number)
- if not val: # did not work, error return
- return val
- xref = val[0] # xref of installed font
- fontdict = val[1]
- if CheckFontInfo(doc, xref): # check again: document already has this font
- return xref # we are done
- # need to create document font info
- doc.get_char_widths(xref, fontdict=fontdict)
- return xref
- def insert_htmlbox(
- page,
- rect,
- text,
- *,
- css=None,
- scale_low=0,
- archive=None,
- rotate=0,
- oc=0,
- opacity=1,
- overlay=True,
- _scale_word_width=True,
- _verbose=False,
- ) -> tuple:
- """Insert text with optional HTML tags and stylings into a rectangle.
- Args:
- rect: (rect-like) rectangle into which the text should be placed.
- text: (str) text with optional HTML tags and stylings.
- css: (str) CSS styling commands.
- scale_low: (float) force-fit content by scaling it down. Must be in
- range [0, 1]. If 1, no scaling will take place. If 0, arbitrary
- down-scaling is acceptable. A value of 0.1 would mean that content
- may be scaled down by at most 90%.
- archive: Archive object pointing to locations of used fonts or images
- rotate: (int) rotate the text in the box by a multiple of 90 degrees.
- oc: (int) the xref of an OCG / OCMD (Optional Content).
- opacity: (float) set opacity of inserted content.
- overlay: (bool) put text on top of page content.
- _scale_word_width: internal, for testing only.
- _verbose: internal, for testing only.
- Returns:
- A tuple of floats (spare_height, scale).
- spare_height:
- The height of the remaining space in <rect> below the
- text, or -1 if we failed to fit.
- scale:
- The scaling required; `0 < scale <= 1`.
- Will be less than `scale_low` if we failed to fit.
- """
- # normalize rotation angle
- if not rotate % 90 == 0:
- raise ValueError("bad rotation angle")
- while rotate < 0:
- rotate += 360
- while rotate >= 360:
- rotate -= 360
- if not 0 <= scale_low <= 1:
- raise ValueError("'scale_low' must be in [0, 1]")
- if css is None:
- css = ""
- rect = Rect(rect)
- if rotate in (90, 270):
- temp_rect = Rect(0, 0, rect.height, rect.width)
- else:
- temp_rect = Rect(0, 0, rect.width, rect.height)
- # use a small border by default
- mycss = "body {margin:1px;}" + css # append user CSS
- # either make a story, or accept a given one
- if isinstance(text, str): # if a string, convert to a Story
- story = Story(html=text, user_css=mycss, archive=archive)
- elif isinstance(text, Story):
- story = text
- else:
- raise ValueError("'text' must be a string or a Story")
-
- # ----------------------------------------------------------------
- # Find a scaling factor that lets our story fit in. Instead of scaling
- # the text smaller, we instead look at how much bigger the rect needs
- # to be to fit the text, then reverse the scaling to get how much we
- # need to scale down the text.
- # ----------------------------------------------------------------
- rect_scale_max = None if scale_low == 0 else 1 / scale_low
- fit = story.fit_scale(
- temp_rect,
- scale_min=1,
- scale_max=rect_scale_max,
- flags=mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW if _scale_word_width else 0,
- verbose=_verbose,
- )
-
- if not fit.big_enough: # there was no fit
- scale = 1 / fit.parameter
- return (-1, scale)
- # fit.filled is a tuple; we convert it in place to a Rect for
- # convenience. (fit.rect is already a Rect.)
- fit.filled = Rect(fit.filled)
- assert (fit.rect.x0, fit.rect.y0) == (0, 0)
- assert (fit.filled.x0, fit.filled.y0) == (0, 0)
-
- scale = 1 / fit.parameter
- assert scale >= scale_low, f'{scale_low=} {scale=}'
-
- spare_height = max((fit.rect.y1 - fit.filled.y1) * scale, 0)
- def rect_function(*args):
- return fit.rect, fit.rect, None
- # draw story on temp PDF page
- doc = story.write_with_links(rect_function)
- # Insert opacity if requested.
- # For this, we prepend a command to the /Contents.
- if 0 <= opacity < 1:
- tpage = doc[0] # load page
- # generate /ExtGstate for the page
- alp0 = tpage._set_opacity(CA=opacity, ca=opacity)
- s = f"/{alp0} gs\n" # generate graphic state command
- TOOLS._insert_contents(tpage, s.encode(), 0)
- # put result in target page
- page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay)
- # -------------------------------------------------------------------------
- # re-insert links in target rect (show_pdf_page cannot copy annotations)
- # -------------------------------------------------------------------------
- # scaled center point of fit.rect
- mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale
- # center point of target rect
- mp2 = (rect.tl + rect.br) / 2
- # compute link positioning matrix:
- # - move center of scaled-down fit.rect to (0,0)
- # - rotate
- # - move (0,0) to center of target rect
- mat = (
- Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y)
- * Matrix(-rotate)
- * Matrix(1, 0, 0, 1, mp2.x, mp2.y)
- )
- # copy over links
- for link in doc[0].get_links():
- link["from"] *= mat
- page.insert_link(link)
- return spare_height, scale
- def insert_image(
- page,
- rect,
- *,
- alpha=-1,
- filename=None,
- height=0,
- keep_proportion=True,
- mask=None,
- oc=0,
- overlay=True,
- pixmap=None,
- rotate=0,
- stream=None,
- width=0,
- xref=0,
- ):
- """Insert an image for display in a rectangle.
- Args:
- rect: (rect_like) position of image on the page.
- alpha: (int, optional) set to 0 if image has no transparency.
- filename: (str, Path, file object) image filename.
- height: (int)
- keep_proportion: (bool) keep width / height ratio (default).
- mask: (bytes, optional) image consisting of alpha values to use.
- oc: (int) xref of OCG or OCMD to declare as Optional Content.
- overlay: (bool) put in foreground (default) or background.
- pixmap: (pymupdf.Pixmap) use this as image.
- rotate: (int) rotate by 0, 90, 180 or 270 degrees.
- stream: (bytes) use this as image.
- width: (int)
- xref: (int) use this as image.
- 'page' and 'rect' are positional, all other parameters are keywords.
- If 'xref' is given, that image is used. Other input options are ignored.
- Else, exactly one of pixmap, stream or filename must be given.
- 'alpha=0' for non-transparent images improves performance significantly.
- Affects stream and filename only.
- Optimum transparent insertions are possible by using filename / stream in
- conjunction with a 'mask' image of alpha values.
- Returns:
- xref (int) of inserted image. Re-use as argument for multiple insertions.
- """
- CheckParent(page)
- doc = page.parent
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
- raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")
- if filename:
- if type(filename) is str:
- pass
- elif hasattr(filename, "absolute"):
- filename = str(filename)
- elif hasattr(filename, "name"):
- filename = filename.name
- else:
- raise ValueError("bad filename")
- if filename and not os.path.exists(filename):
- raise FileNotFoundError("No such file: '%s'" % filename)
- elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
- raise ValueError("stream must be bytes-like / BytesIO")
- elif pixmap and type(pixmap) is not Pixmap:
- raise ValueError("pixmap must be a Pixmap")
- if mask and not (stream or filename):
- raise ValueError("mask requires stream or filename")
- if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
- raise ValueError("mask must be bytes-like / BytesIO")
- while rotate < 0:
- rotate += 360
- while rotate >= 360:
- rotate -= 360
- if rotate not in (0, 90, 180, 270):
- raise ValueError("bad rotate value")
- r = Rect(rect)
- if r.is_empty or r.is_infinite:
- raise ValueError("rect must be finite and not empty")
- clip = r * ~page.transformation_matrix
- # Create a unique image reference name.
- ilst = [i[7] for i in doc.get_page_images(page.number)]
- ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
- ilst += [i[4] for i in doc.get_page_fonts(page.number)]
- n = "fzImg" # 'pymupdf image'
- i = 0
- _imgname = n + "0" # first name candidate
- while _imgname in ilst:
- i += 1
- _imgname = n + str(i) # try new name
- if overlay:
- page.wrap_contents() # ensure a balanced graphics state
- digests = doc.InsertedImages
- xref, digests = page._insert_image(
- filename=filename,
- pixmap=pixmap,
- stream=stream,
- imask=mask,
- clip=clip,
- overlay=overlay,
- oc=oc,
- xref=xref,
- rotate=rotate,
- keep_proportion=keep_proportion,
- width=width,
- height=height,
- alpha=alpha,
- _imgname=_imgname,
- digests=digests,
- )
- if digests is not None:
- doc.InsertedImages = digests
- return xref
- def insert_link(page: 'Page', lnk: dict, mark: bool = True) -> None:
- """Insert a new link for the current page."""
- CheckParent(page)
- annot = utils.getLinkText(page, lnk)
- if annot == "":
- raise ValueError("link kind not supported")
- page._addAnnot_FromString((annot,))
- def insert_text(
- page: 'Page',
- point: point_like,
- text: typing.Union[str, list],
- *,
- fontsize: float = 11,
- lineheight: OptFloat = None,
- fontname: str = "helv",
- fontfile: OptStr = None,
- set_simple: int = 0,
- encoding: int = 0,
- color: OptSeq = None,
- fill: OptSeq = None,
- border_width: float = 0.05,
- miter_limit: float = 1,
- render_mode: int = 0,
- rotate: int = 0,
- morph: OptSeq = None,
- overlay: bool = True,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ):
- img = page.new_shape()
- rc = img.insert_text(
- point,
- text,
- fontsize=fontsize,
- lineheight=lineheight,
- fontname=fontname,
- fontfile=fontfile,
- set_simple=set_simple,
- encoding=encoding,
- color=color,
- fill=fill,
- border_width=border_width,
- render_mode=render_mode,
- miter_limit=miter_limit,
- rotate=rotate,
- morph=morph,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- if rc >= 0:
- img.commit(overlay)
- return rc
- def insert_textbox(
- page: 'Page',
- rect: rect_like,
- buffer: typing.Union[str, list],
- *,
- fontname: str = "helv",
- fontfile: OptStr = None,
- set_simple: int = 0,
- encoding: int = 0,
- fontsize: float = 11,
- lineheight: OptFloat = None,
- color: OptSeq = None,
- fill: OptSeq = None,
- expandtabs: int = 1,
- align: int = 0,
- rotate: int = 0,
- render_mode: int = 0,
- miter_limit: float = 1,
- border_width: float = 0.05,
- morph: OptSeq = None,
- overlay: bool = True,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> float:
- """Insert text into a given rectangle.
- Notes:
- Creates a Shape object, uses its same-named method and commits it.
- Parameters:
- rect: (rect-like) area to use for text.
- buffer: text to be inserted
- fontname: a Base-14 font, font name or '/name'
- fontfile: name of a font file
- fontsize: font size
- lineheight: overwrite the font property
- color: RGB color triple
- expandtabs: handles tabulators with string function
- align: left, center, right, justified
- rotate: 0, 90, 180, or 270 degrees
- morph: morph box with a matrix and a fixpoint
- overlay: put text in foreground or background
- Returns:
- unused or deficit rectangle area (float)
- """
- img = page.new_shape()
- rc = img.insert_textbox(
- rect,
- buffer,
- fontsize=fontsize,
- lineheight=lineheight,
- fontname=fontname,
- fontfile=fontfile,
- set_simple=set_simple,
- encoding=encoding,
- color=color,
- fill=fill,
- expandtabs=expandtabs,
- render_mode=render_mode,
- miter_limit=miter_limit,
- border_width=border_width,
- align=align,
- rotate=rotate,
- morph=morph,
- stroke_opacity=stroke_opacity,
- fill_opacity=fill_opacity,
- oc=oc,
- )
- if rc >= 0:
- img.commit(overlay)
- return rc
- @property
- def is_wrapped(self):
- """Check if /Contents is in a balanced graphics state."""
- return self._count_q_balance() == (0, 0)
- @property
- def language(self):
- """Page language."""
- pdfpage = _as_pdf_page(self.this, required=False)
- if not pdfpage.m_internal:
- return
- lang = mupdf.pdf_dict_get_inheritable(pdfpage.obj(), PDF_NAME('Lang'))
- if not lang.m_internal:
- return
- return mupdf.pdf_to_str_buf(lang)
- def links(self, kinds=None):
- """ Generator over the links of a page.
- Args:
- kinds: (list) link kinds to subselect from. If none,
- all links are returned. E.g. kinds=[LINK_URI]
- will only yield URI links.
- """
- all_links = self.get_links()
- for link in all_links:
- if kinds is None or link["kind"] in kinds:
- yield (link)
- def load_annot(self, ident: typing.Union[str, int]) -> Annot:
- """Load an annot by name (/NM key) or xref.
- Args:
- ident: identifier, either name (str) or xref (int).
- """
- CheckParent(self)
- if type(ident) is str:
- xref = 0
- name = ident
- elif type(ident) is int:
- xref = ident
- name = None
- else:
- raise ValueError("identifier must be a string or integer")
- val = self._load_annot(name, xref)
- if not val:
- return val
- val.thisown = True
- val.parent = weakref.proxy(self)
- self._annot_refs[id(val)] = val
- return val
- def load_links(self):
- """Get first Link."""
- CheckParent(self)
- val = mupdf.fz_load_links( self.this)
- if not val.m_internal:
- return
- val = Link( val)
- val.thisown = True
- val.parent = weakref.proxy(self) # owning page object
- self._annot_refs[id(val)] = val
- val.xref = 0
- val.id = ""
- if self.parent.is_pdf:
- xrefs = self.annot_xrefs()
- xrefs = [x for x in xrefs if x[1] == mupdf.PDF_ANNOT_LINK]
- if xrefs:
- link_id = xrefs[0]
- val.xref = link_id[0]
- val.id = link_id[2]
- else:
- val.xref = 0
- val.id = ""
- return val
- #----------------------------------------------------------------
- # page load widget by xref
- #----------------------------------------------------------------
- def load_widget( self, xref):
- """Load a widget by its xref."""
- CheckParent(self)
- page = _as_pdf_page(self.this)
- annot = JM_get_widget_by_xref( page, xref)
- #log( '{=type(annot)}')
- val = annot
- if not val:
- return val
- val.thisown = True
- val.parent = weakref.proxy(self)
- self._annot_refs[id(val)] = val
- widget = Widget()
- TOOLS._fill_widget(val, widget)
- val = widget
- return val
- @property
- def mediabox(self):
- """The MediaBox."""
- CheckParent(self)
- page = self._pdf_page(required=False)
- if not page.m_internal:
- rect = mupdf.fz_bound_page( self.this)
- else:
- rect = JM_mediabox( page.obj())
- return Rect(rect)
- @property
- def mediabox_size(self):
- return Point(self.mediabox.x1, self.mediabox.y1)
- def new_shape(self):
- return Shape(self)
- #@property
- #def parent( self):
- # assert self._parent
- # if self._parent:
- # return self._parent
- # return Document( self.this.document())
- def read_contents(self):
- """All /Contents streams concatenated to one bytes object."""
- return TOOLS._get_all_contents(self)
- def refresh(self):
- """Refresh page after link/annot/widget updates."""
- CheckParent(self)
- doc = self.parent
- page = doc.reload_page(self)
- # fixme this looks wrong.
- self.this = page
- def replace_image(
- page: 'Page',
- xref: int,
- *,
- filename=None,
- pixmap=None,
- stream=None,
- ):
- """Replace the image referred to by xref.
- Replace the image by changing the object definition stored under xref. This
- will leave the pages appearance instructions intact, so the new image is
- being displayed with the same bbox, rotation etc.
- By providing a small fully transparent image, an effect as if the image had
- been deleted can be achieved.
- A typical use may include replacing large images by a smaller version,
- e.g. with a lower resolution or graylevel instead of colored.
- Args:
- xref: the xref of the image to replace.
- filename, pixmap, stream: exactly one of these must be provided. The
- meaning being the same as in Page.insert_image.
- """
- doc = page.parent # the owning document
- if not doc.xref_is_image(xref):
- raise ValueError("xref not an image") # insert new image anywhere in page
- if bool(filename) + bool(stream) + bool(pixmap) != 1:
- raise ValueError("Exactly one of filename/stream/pixmap must be given")
- new_xref = page.insert_image(
- page.rect, filename=filename, stream=stream, pixmap=pixmap
- )
- doc.xref_copy(new_xref, xref) # copy over new to old
- last_contents_xref = page.get_contents()[-1]
- # new image insertion has created a new /Contents source,
- # which we will set to spaces now
- doc.update_stream(last_contents_xref, b" ")
- page._image_info = None # clear cache of extracted image information
- @property
- def rotation(self):
- """Page rotation."""
- CheckParent(self)
- page = _as_pdf_page(self.this, required=0)
- if not page.m_internal:
- return 0
- return JM_page_rotation(page)
- @property
- def rotation_matrix(self) -> Matrix:
- """Reflects page rotation."""
- return Matrix(TOOLS._rotate_matrix(self))
- def run(self, dw, m):
- """Run page through a device.
- dw: DeviceWrapper
- """
- CheckParent(self)
- mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
- def search_for(
- page,
- text,
- *,
- clip=None,
- quads=False,
- flags=None,
- textpage=None,
- ) -> list:
- """Search for a string on a page.
- Args:
- text: string to be searched for
- clip: restrict search to this rectangle
- quads: (bool) return quads instead of rectangles
- flags: bit switches, default: join hyphened words
- textpage: a pre-created pymupdf.TextPage
- Returns:
- a list of rectangles or quads, each containing one occurrence.
- """
- if flags is None:
- flags=(0
- | TEXT_DEHYPHENATE
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_PRESERVE_LIGATURES
- | TEXT_MEDIABOX_CLIP
- )
- if clip is not None:
- clip = Rect(clip)
- CheckParent(page)
- tp = textpage
- if tp is None:
- tp = page.get_textpage(clip=clip, flags=flags) # create pymupdf.TextPage
- elif getattr(tp, "parent") != page:
- raise ValueError("not a textpage of this page")
- rlist = tp.search(text, quads=quads)
- if textpage is None:
- del tp
- return rlist
- def set_artbox(self, rect):
- """Set the ArtBox."""
- return self._set_pagebox("ArtBox", rect)
- def set_bleedbox(self, rect):
- """Set the BleedBox."""
- return self._set_pagebox("BleedBox", rect)
- def set_contents(self, xref):
- """Set object at 'xref' as the page's /Contents."""
- CheckParent(self)
- doc = self.parent
- if doc.is_closed:
- raise ValueError("document closed")
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- if xref not in range(1, doc.xref_length()):
- raise ValueError("bad xref")
- if not doc.xref_is_stream(xref):
- raise ValueError("xref is no stream")
- doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref)
- def set_cropbox(self, rect):
- """Set the CropBox. Will also change Page.rect."""
- return self._set_pagebox("CropBox", rect)
- def set_language(self, language=None):
- """Set PDF page default language."""
- CheckParent(self)
- pdfpage = _as_pdf_page(self.this)
- if not language:
- mupdf.pdf_dict_del(pdfpage.obj(), PDF_NAME('Lang'))
- else:
- lang = mupdf.fz_text_language_from_string(language)
- assert hasattr(mupdf, 'fz_string_from_text_language2')
- mupdf.pdf_dict_put_text_string(
- pdfpage.obj,
- PDF_NAME('Lang'),
- mupdf.fz_string_from_text_language2(lang)
- )
- def set_mediabox(self, rect):
- """Set the MediaBox."""
- CheckParent(self)
- page = self._pdf_page()
- mediabox = JM_rect_from_py(rect)
- if (mupdf.fz_is_empty_rect(mediabox)
- or mupdf.fz_is_infinite_rect(mediabox)
- ):
- raise ValueError( MSG_BAD_RECT)
- mupdf.pdf_dict_put_rect( page.obj(), PDF_NAME('MediaBox'), mediabox)
- mupdf.pdf_dict_del( page.obj(), PDF_NAME('CropBox'))
- mupdf.pdf_dict_del( page.obj(), PDF_NAME('ArtBox'))
- mupdf.pdf_dict_del( page.obj(), PDF_NAME('BleedBox'))
- mupdf.pdf_dict_del( page.obj(), PDF_NAME('TrimBox'))
- def set_rotation(self, rotation):
- """Set page rotation."""
- CheckParent(self)
- page = _as_pdf_page(self.this)
- rot = JM_norm_rotation(rotation)
- mupdf.pdf_dict_put_int( page.obj(), PDF_NAME('Rotate'), rot)
- def set_trimbox(self, rect):
- """Set the TrimBox."""
- return self._set_pagebox("TrimBox", rect)
- def show_pdf_page(
- page,
- rect,
- docsrc,
- pno=0,
- keep_proportion=True,
- overlay=True,
- oc=0,
- rotate=0,
- clip=None,
- ) -> int:
- """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'.
- Args:
- rect: (rect-like) where to place the source image
- docsrc: (document) source PDF
- pno: (int) source page number
- keep_proportion: (bool) do not change width-height-ratio
- overlay: (bool) put in foreground
- oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF)
- rotate: (int) degrees (multiple of 90)
- clip: (rect-like) part of source page rectangle
- Returns:
- xref of inserted object (for reuse)
- """
- def calc_matrix(sr, tr, keep=True, rotate=0):
- """Calculate transformation matrix from source to target rect.
- Notes:
- The product of four matrices in this sequence: (1) translate correct
- source corner to origin, (2) rotate, (3) scale, (4) translate to
- target's top-left corner.
- Args:
- sr: source rect in PDF (!) coordinate system
- tr: target rect in PDF coordinate system
- keep: whether to keep source ratio of width to height
- rotate: rotation angle in degrees
- Returns:
- Transformation matrix.
- """
- # calc center point of source rect
- smp = (sr.tl + sr.br) / 2.0
- # calc center point of target rect
- tmp = (tr.tl + tr.br) / 2.0
- # m moves to (0, 0), then rotates
- m = Matrix(1, 0, 0, 1, -smp.x, -smp.y) * Matrix(rotate)
- sr1 = sr * m # resulting source rect to calculate scale factors
- fw = tr.width / sr1.width # scale the width
- fh = tr.height / sr1.height # scale the height
- if keep:
- fw = fh = min(fw, fh) # take min if keeping aspect ratio
- m *= Matrix(fw, fh) # concat scale matrix
- m *= Matrix(1, 0, 0, 1, tmp.x, tmp.y) # concat move to target center
- return JM_TUPLE(m)
- CheckParent(page)
- doc = page.parent
- if not doc.is_pdf or not docsrc.is_pdf:
- raise ValueError("is no PDF")
- if rect.is_empty or rect.is_infinite:
- raise ValueError("rect must be finite and not empty")
- while pno < 0: # support negative page numbers
- pno += docsrc.page_count
- src_page = docsrc[pno] # load source page
- tar_rect = rect * ~page.transformation_matrix # target rect in PDF coordinates
- src_rect = src_page.rect if not clip else src_page.rect & clip # source rect
- if src_rect.is_empty or src_rect.is_infinite:
- raise ValueError("clip must be finite and not empty")
- src_rect = src_rect * ~src_page.transformation_matrix # ... in PDF coord
- matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)
- # list of existing /Form /XObjects
- ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
- ilst += [i[7] for i in doc.get_page_images(page.number)]
- ilst += [i[4] for i in doc.get_page_fonts(page.number)]
- # create a name not in that list
- n = "fzFrm"
- i = 0
- _imgname = n + "0"
- while _imgname in ilst:
- i += 1
- _imgname = n + str(i)
- isrc = docsrc._graft_id # used as key for graftmaps
- if doc._graft_id == isrc:
- raise ValueError("source document must not equal target")
- # retrieve / make Graftmap for source PDF
- gmap = doc.Graftmaps.get(isrc, None)
- if gmap is None:
- gmap = Graftmap(doc)
- doc.Graftmaps[isrc] = gmap
- # take note of generated xref for automatic reuse
- pno_id = (isrc, pno) # id of docsrc[pno]
- xref = doc.ShownPages.get(pno_id, 0)
- if overlay:
- page.wrap_contents() # ensure a balanced graphics state
- xref = page._show_pdf_page(
- src_page,
- overlay=overlay,
- matrix=matrix,
- xref=xref,
- oc=oc,
- clip=src_rect,
- graftmap=gmap,
- _imgname=_imgname,
- )
- doc.ShownPages[pno_id] = xref
- return xref
- @property
- def transformation_matrix(self):
- """Page transformation matrix."""
- CheckParent(self)
- ctm = mupdf.FzMatrix()
- page = self._pdf_page(required=False)
- if not page.m_internal:
- return JM_py_from_matrix(ctm)
- mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) # fixme: original code passed mediabox=NULL.
- mupdf.pdf_page_transform(page, mediabox, ctm)
- val = JM_py_from_matrix(ctm)
- if self.rotation % 360 == 0:
- val = Matrix(val)
- else:
- val = Matrix(1, 0, 0, -1, 0, self.cropbox.height)
- return val
- @property
- def trimbox(self):
- """The TrimBox"""
- rect = self._other_box("TrimBox")
- if rect is None:
- return self.cropbox
- mb = self.mediabox
- return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
- def update_link(page: 'Page', lnk: dict) -> None:
- """Update a link on the current page."""
- CheckParent(page)
- annot = utils.getLinkText(page, lnk)
- if annot == "":
- raise ValueError("link kind not supported")
- page.parent.update_object(lnk["xref"], annot, page=page)
- def widgets(self, types=None):
- """ Generator over the widgets of a page.
- Args:
- types: (list) field types to subselect from. If none,
- all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT]
- will only yield text fields.
- """
- #for a in self.annot_xrefs():
- # log( '{a=}')
- widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == mupdf.PDF_ANNOT_WIDGET]
- #log(f'widgets(): {widget_xrefs=}')
- for xref in widget_xrefs:
- widget = self.load_widget(xref)
- if types is None or widget.field_type in types:
- yield (widget)
- def wrap_contents(self):
- """Ensure page is in a balanced graphics state."""
- push, pop = self._count_q_balance() # count missing "q"/"Q" commands
- if push > 0: # prepend required push commands
- prepend = b"q\n" * push
- TOOLS._insert_contents(self, prepend, False)
- if pop > 0: # append required pop commands
- append = b"\nQ" * pop + b"\n"
- TOOLS._insert_contents(self, append, True)
- def write_text(
- page: 'Page',
- rect=None,
- writers=None,
- overlay=True,
- color=None,
- opacity=None,
- keep_proportion=True,
- rotate=0,
- oc=0,
- ) -> None:
- """Write the text of one or more pymupdf.TextWriter objects.
- Args:
- rect: target rectangle. If None, the union of the text writers is used.
- writers: one or more pymupdf.TextWriter objects.
- overlay: put in foreground or background.
- keep_proportion: maintain aspect ratio of rectangle sides.
- rotate: arbitrary rotation angle.
- oc: the xref of an optional content object
- """
- assert isinstance(page, Page)
- if not writers:
- raise ValueError("need at least one pymupdf.TextWriter")
- if type(writers) is TextWriter:
- if rotate == 0 and rect is None:
- writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
- return None
- else:
- writers = (writers,)
- clip = writers[0].text_rect
- textdoc = Document()
- tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
- for writer in writers:
- clip |= writer.text_rect
- writer.write_text(tpage, opacity=opacity, color=color)
- if rect is None:
- rect = clip
- page.show_pdf_page(
- rect,
- textdoc,
- 0,
- overlay=overlay,
- keep_proportion=keep_proportion,
- rotate=rotate,
- clip=clip,
- oc=oc,
- )
- textdoc = None
- tpage = None
- @property
- def xref(self):
- """PDF xref number of page."""
- CheckParent(self)
- return self.parent.page_xref(self.number)
- rect = property(bound, doc="page rectangle")
- # any result of layout analysis is stored here
- layout_information = None
- class Pixmap:
- def __init__(self, *args):
- """
- Pixmap(colorspace, irect, alpha) - empty pixmap.
- Pixmap(colorspace, src) - copy changing colorspace.
- Pixmap(src, width, height,[clip]) - scaled copy, float dimensions.
- Pixmap(src, alpha=1) - copy and add or drop alpha channel.
- Pixmap(filename) - from an image in a file.
- Pixmap(image) - from an image in memory (bytes).
- Pixmap(colorspace, width, height, samples, alpha) - from samples data.
- Pixmap(PDFdoc, xref) - from an image at xref in a PDF document.
- """
- # Cache for property `self.samples_mv`. Set here so __del_() sees it if
- # we raise.
- #
- self._samples_mv = None
- # 2024-01-16: Experimental support for a memory-view of the underlying
- # data. Doesn't seem to make much difference to Pixmap.set_pixel() so
- # not currently used.
- self._memory_view = None
-
- if 0:
- pass
- elif args_match(args,
- (Colorspace, mupdf.FzColorspace),
- (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple)
- ):
- # create empty pixmap with colorspace and IRect
- cs, rect = args
- alpha = 0
- pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
- self.this = pm
- elif args_match(args,
- (Colorspace, mupdf.FzColorspace),
- (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple),
- (int, bool)
- ):
- # create empty pixmap with colorspace and IRect
- cs, rect, alpha = args
- pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
- self.this = pm
- elif args_match(args, (Colorspace, mupdf.FzColorspace, type(None)), (Pixmap, mupdf.FzPixmap)):
- # copy pixmap, converting colorspace
- cs, spix = args
- if isinstance(cs, Colorspace):
- cs = cs.this
- elif cs is None:
- cs = mupdf.FzColorspace(None)
- if isinstance(spix, Pixmap):
- spix = spix.this
- if not mupdf.fz_pixmap_colorspace(spix).m_internal:
- raise ValueError( "source colorspace must not be None")
-
- if cs.m_internal:
- self.this = mupdf.fz_convert_pixmap(
- spix,
- cs,
- mupdf.FzColorspace(),
- mupdf.FzDefaultColorspaces(None),
- mupdf.FzColorParams(),
- 1
- )
- else:
- self.this = mupdf.fz_new_pixmap_from_alpha_channel( spix)
- if not self.this.m_internal:
- raise RuntimeError( MSG_PIX_NOALPHA)
- elif args_match(args, (Pixmap, mupdf.FzPixmap), (Pixmap, mupdf.FzPixmap)):
- # add mask to a pixmap w/o alpha channel
- spix, mpix = args
- if isinstance(spix, Pixmap):
- spix = spix.this
- if isinstance(mpix, Pixmap):
- mpix = mpix.this
- spm = spix
- mpm = mpix
- if not spix.m_internal: # intercept NULL for spix: make alpha only pix
- dst = mupdf.fz_new_pixmap_from_alpha_channel(mpm)
- if not dst.m_internal:
- raise RuntimeError( MSG_PIX_NOALPHA)
- else:
- dst = mupdf.fz_new_pixmap_from_color_and_mask(spm, mpm)
- self.this = dst
- elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or
- args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))):
- # create pixmap as scaled copy of another one
- if len(args) == 3:
- spix, w, h = args
- bbox = mupdf.FzIrect(mupdf.fz_infinite_irect)
- else:
- spix, w, h, clip = args
- bbox = JM_irect_from_py(clip)
-
- src_pix = spix.this if isinstance(spix, Pixmap) else spix
- if not mupdf.fz_is_infinite_irect(bbox):
- pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, bbox)
- else:
- pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, mupdf.FzIrect(mupdf.fz_infinite_irect))
- self.this = pm
- elif args_match(args, str, (Pixmap, mupdf.FzPixmap)) and args[0] == 'raw':
- # Special raw construction where we set .this directly.
- _, pm = args
- if isinstance(pm, Pixmap):
- pm = pm.this
- self.this = pm
- elif args_match(args, (Pixmap, mupdf.FzPixmap), (int, None)):
- # Pixmap(struct Pixmap *spix, int alpha=1)
- # copy pixmap & add / drop the alpha channel
- spix = args[0]
- alpha = args[1] if len(args) == 2 else 1
- src_pix = spix.this if isinstance(spix, Pixmap) else spix
- if not _INRANGE(alpha, 0, 1):
- raise ValueError( "bad alpha value")
- cs = mupdf.fz_pixmap_colorspace(src_pix)
- if not cs.m_internal and not alpha:
- raise ValueError( "cannot drop alpha for 'NULL' colorspace")
- seps = mupdf.FzSeparations()
- n = mupdf.fz_pixmap_colorants(src_pix)
- w = mupdf.fz_pixmap_width(src_pix)
- h = mupdf.fz_pixmap_height(src_pix)
- pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
- pm.m_internal.x = src_pix.m_internal.x
- pm.m_internal.y = src_pix.m_internal.y
- pm.m_internal.xres = src_pix.m_internal.xres
- pm.m_internal.yres = src_pix.m_internal.yres
- # copy samples data ------------------------------------------
- if 1:
- # We use our pixmap_copy() to get best performance.
- # test_pixmap.py:test_setalpha(): 3.9s t=0.0062
- extra.pixmap_copy( pm.m_internal, src_pix.m_internal, n)
- elif 1:
- # Use memoryview.
- # test_pixmap.py:test_setalpha(): 4.6 t=0.51
- src_view = mupdf.fz_pixmap_samples_memoryview( src_pix)
- pm_view = mupdf.fz_pixmap_samples_memoryview( pm)
- if src_pix.alpha() == pm.alpha(): # identical samples
- #memcpy(tptr, sptr, w * h * (n + alpha));
- size = w * h * (n + alpha)
- pm_view[ 0 : size] = src_view[ 0 : size]
- else:
- tptr = 0
- sptr = 0
- # This is a little faster than calling
- # pm.fz_samples_set(), but still quite slow. E.g. reduces
- # test_pixmap.py:test_setalpha() from 6.7s to 4.5s.
- #
- # t=0.53
- pm_stride = pm.stride()
- pm_n = pm.n()
- pm_alpha = pm.alpha()
- src_stride = src_pix.stride()
- src_n = src_pix.n()
- #log( '{=pm_stride pm_n src_stride src_n}')
- for y in range( h):
- for x in range( w):
- pm_i = pm_stride * y + pm_n * x
- src_i = src_stride * y + src_n * x
- pm_view[ pm_i : pm_i + n] = src_view[ src_i : src_i + n]
- if pm_alpha:
- pm_view[ pm_i + n] = 255
- else:
- # Copy individual bytes from Python. Very slow.
- # test_pixmap.py:test_setalpha(): 6.89 t=2.601
- if src_pix.alpha() == pm.alpha(): # identical samples
- #memcpy(tptr, sptr, w * h * (n + alpha));
- for i in range(w * h * (n + alpha)):
- mupdf.fz_samples_set(pm, i, mupdf.fz_samples_get(src_pix, i))
- else:
- # t=2.56
- tptr = 0
- sptr = 0
- src_pix_alpha = src_pix.alpha()
- for i in range(w * h):
- #memcpy(tptr, sptr, n);
- for j in range(n):
- mupdf.fz_samples_set(pm, tptr + j, mupdf.fz_samples_get(src_pix, sptr + j))
- tptr += n
- if pm.alpha():
- mupdf.fz_samples_set(pm, tptr, 255)
- tptr += 1
- sptr += n + src_pix_alpha
- self.this = pm
- elif args_match(args, (mupdf.FzColorspace, Colorspace), int, int, None, (int, bool)):
- # create pixmap from samples data
- cs, w, h, samples, alpha = args
- if isinstance(cs, Colorspace):
- cs = cs.this
- assert isinstance(cs, mupdf.FzColorspace)
- n = mupdf.fz_colorspace_n(cs)
- stride = (n + alpha) * w
- seps = mupdf.FzSeparations()
- pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
- if isinstance( samples, (bytes, bytearray)):
- #log('using mupdf.python_buffer_data()')
- samples2 = mupdf.python_buffer_data(samples)
- size = len(samples)
- else:
- res = JM_BufferFromBytes(samples)
- if not res.m_internal:
- raise ValueError( "bad samples data")
- size, c = mupdf.fz_buffer_storage(res)
- samples2 = mupdf.python_buffer_data(samples) # raw swig proxy for `const unsigned char*`.
- if stride * h != size:
- raise ValueError( f"bad samples length {w=} {h=} {alpha=} {n=} {stride=} {size=}")
- mupdf.ll_fz_pixmap_copy_raw( pm.m_internal, samples2)
- self.this = pm
- elif args_match(args, None):
- # create pixmap from filename, file object, pathlib.Path or memory
- imagedata, = args
- name = 'name'
- if hasattr(imagedata, "resolve"):
- fname = imagedata.__str__()
- if fname:
- img = mupdf.fz_new_image_from_file(fname)
- elif hasattr(imagedata, name):
- fname = imagedata.name
- if fname:
- img = mupdf.fz_new_image_from_file(fname)
- elif isinstance(imagedata, str):
- img = mupdf.fz_new_image_from_file(imagedata)
- else:
- res = JM_BufferFromBytes(imagedata)
- if not res.m_internal or not res.m_internal.len:
- raise ValueError( "bad image data")
- img = mupdf.fz_new_image_from_buffer(res)
- # Original code passed null for subarea and ctm, but that's not
- # possible with MuPDF's python bindings. The equivalent is an
- # infinite rect and identify matrix scaled by img.w() and img.h().
- pm, w, h = mupdf.fz_get_pixmap_from_image(
- img,
- mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
- mupdf.FzMatrix( img.w(), 0, 0, img.h(), 0, 0),
- )
- xres, yres = mupdf.fz_image_resolution(img)
- pm.m_internal.xres = xres
- pm.m_internal.yres = yres
- self.this = pm
- elif args_match(args, (Document, mupdf.FzDocument), int):
- # Create pixmap from PDF image identified by XREF number
- doc, xref = args
- pdf = _as_pdf_document(doc)
- xreflen = mupdf.pdf_xref_len(pdf)
- if not _INRANGE(xref, 1, xreflen-1):
- raise ValueError( MSG_BAD_XREF)
- ref = mupdf.pdf_new_indirect(pdf, xref, 0)
- type_ = mupdf.pdf_dict_get(ref, PDF_NAME('Subtype'))
- if (not mupdf.pdf_name_eq(type_, PDF_NAME('Image'))
- and not mupdf.pdf_name_eq(type_, PDF_NAME('Alpha'))
- and not mupdf.pdf_name_eq(type_, PDF_NAME('Luminosity'))
- ):
- raise ValueError( MSG_IS_NO_IMAGE)
- img = mupdf.pdf_load_image(pdf, ref)
- # Original code passed null for subarea and ctm, but that's not
- # possible with MuPDF's python bindings. The equivalent is an
- # infinite rect and identify matrix scaled by img.w() and img.h().
- pix, w, h = mupdf.fz_get_pixmap_from_image(
- img,
- mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
- mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0),
- )
- self.this = pix
- else:
- text = 'Unrecognised args for constructing Pixmap:\n'
- for arg in args:
- text += f' {type(arg)}: {arg}\n'
- raise Exception( text)
- def __len__(self):
- return self.size
- def __repr__(self):
- if not type(self) is Pixmap: return
- if self.colorspace:
- return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha)
- else:
- return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha)
- def _tobytes(self, format_, jpg_quality):
- '''
- Pixmap._tobytes
- '''
- pm = self.this
- size = mupdf.fz_pixmap_stride(pm) * pm.h()
- res = mupdf.fz_new_buffer(size)
- out = mupdf.FzOutput(res)
- if format_ == 1: mupdf.fz_write_pixmap_as_png(out, pm)
- elif format_ == 2: mupdf.fz_write_pixmap_as_pnm(out, pm)
- elif format_ == 3: mupdf.fz_write_pixmap_as_pam(out, pm)
- elif format_ == 5: mupdf.fz_write_pixmap_as_psd(out, pm)
- elif format_ == 6: mupdf.fz_write_pixmap_as_ps(out, pm)
- elif format_ == 7:
- mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality, 0)
- else:
- mupdf.fz_write_pixmap_as_png(out, pm)
- out.fz_close_output()
- barray = JM_BinFromBuffer(res)
- return barray
- def _writeIMG(self, filename, format_, jpg_quality):
- pm = self.this
- if format_ == 1: mupdf.fz_save_pixmap_as_png(pm, filename)
- elif format_ == 2: mupdf.fz_save_pixmap_as_pnm(pm, filename)
- elif format_ == 3: mupdf.fz_save_pixmap_as_pam(pm, filename)
- elif format_ == 5: mupdf.fz_save_pixmap_as_psd(pm, filename)
- elif format_ == 6: mupdf.fz_save_pixmap_as_ps(pm, filename)
- elif format_ == 7: mupdf.fz_save_pixmap_as_jpeg(pm, filename, jpg_quality)
- else: mupdf.fz_save_pixmap_as_png(pm, filename)
- @property
- def alpha(self):
- """Indicates presence of alpha channel."""
- return mupdf.fz_pixmap_alpha(self.this)
- def clear_with(self, value=None, bbox=None):
- """Fill all color components with same value."""
- if value is None:
- mupdf.fz_clear_pixmap(self.this)
- elif bbox is None:
- mupdf.fz_clear_pixmap_with_value(self.this, value)
- else:
- JM_clear_pixmap_rect_with_value(self.this, value, JM_irect_from_py(bbox))
- def color_count(self, colors=0, clip=None):
- '''
- Return count of each color.
- '''
- pm = self.this
- rc = JM_color_count( pm, clip)
- if not colors:
- return len( rc)
- return rc
- def color_topusage(self, clip=None):
- """Return most frequent color and its usage ratio."""
- allpixels = 0
- cnt = 0
- if clip is not None and self.irect in Rect(clip):
- clip = self.irect
- for pixel, count in self.color_count(colors=True,clip=clip).items():
- allpixels += count
- if count > cnt:
- cnt = count
- maxpixel = pixel
- if not allpixels:
- return (1, bytes([255] * self.n))
- return (cnt / allpixels, maxpixel)
- @property
- def colorspace(self):
- """Pixmap Colorspace."""
- cs = Colorspace(mupdf.fz_pixmap_colorspace(self.this))
- if cs.name == "None":
- return None
- return cs
- def copy(self, src, bbox):
- """Copy bbox from another Pixmap."""
- pm = self.this
- src_pix = src.this
- if not mupdf.fz_pixmap_colorspace(src_pix):
- raise ValueError( "cannot copy pixmap with NULL colorspace")
- if pm.alpha() != src_pix.alpha():
- raise ValueError( "source and target alpha must be equal")
- mupdf.fz_copy_pixmap_rect(pm, src_pix, JM_irect_from_py(bbox), mupdf.FzDefaultColorspaces(None))
- @property
- def digest(self):
- """MD5 digest of pixmap (bytes)."""
- ret = mupdf.fz_md5_pixmap2(self.this)
- return bytes(ret)
- def gamma_with(self, gamma):
- """Apply correction with some float.
- gamma=1 is a no-op."""
- if not mupdf.fz_pixmap_colorspace( self.this):
- message_warning("colorspace invalid for function")
- return
- mupdf.fz_gamma_pixmap( self.this, gamma)
- @property
- def h(self):
- """The height."""
- return mupdf.fz_pixmap_height(self.this)
- def invert_irect(self, bbox=None):
- """Invert the colors inside a bbox."""
- pm = self.this
- if not mupdf.fz_pixmap_colorspace(pm).m_internal:
- message_warning("ignored for stencil pixmap")
- return False
- r = JM_irect_from_py(bbox)
- if mupdf.fz_is_infinite_irect(r):
- mupdf.fz_invert_pixmap(pm)
- return True
- mupdf.fz_invert_pixmap_rect(pm, r)
- return True
- @property
- def irect(self):
- """Pixmap bbox - an IRect object."""
- val = mupdf.fz_pixmap_bbox(self.this)
- return JM_py_from_irect( val)
- @property
- def is_monochrome(self):
- """Check if pixmap is monochrome."""
- return mupdf.fz_is_pixmap_monochrome( self.this)
- @property
- def is_unicolor(self):
- '''
- Check if pixmap has only one color.
- '''
- pm = self.this
- n = pm.n()
- count = pm.w() * pm.h() * n
- def _pixmap_read_samples(pm, offset, n):
- ret = list()
- for i in range(n):
- ret.append(mupdf.fz_samples_get(pm, offset+i))
- return ret
- for offset in range( 0, count, n):
- if offset == 0:
- sample0 = _pixmap_read_samples( pm, 0, n)
- else:
- sample = _pixmap_read_samples( pm, offset, n)
- if sample != sample0:
- return False
- return True
- @property
- def n(self):
- """The size of one pixel."""
- if g_use_extra:
- # Setting self.__class__.n gives a small reduction in overhead of
- # test_general.py:test_2093, e.g. 1.4x -> 1.3x.
- #return extra.pixmap_n(self.this)
- def n2(self):
- return extra.pixmap_n(self.this)
- self.__class__.n = property(n2)
- return self.n
- return mupdf.fz_pixmap_components(self.this)
- def pdfocr_save(self, filename, compress=1, language=None, tessdata=None):
- '''
- Save pixmap as an OCR-ed PDF page.
- '''
- tessdata = get_tessdata(tessdata)
- opts = mupdf.FzPdfocrOptions()
- opts.compress = compress
- if language:
- opts.language_set2( language)
- if tessdata:
- opts.datadir_set2( tessdata)
- pix = self.this
- if isinstance(filename, str):
- mupdf.fz_save_pixmap_as_pdfocr( pix, filename, 0, opts)
- else:
- out = JM_new_output_fileptr( filename)
- try:
- mupdf.fz_write_pixmap_as_pdfocr( out, pix, opts)
- finally:
- out.fz_close_output() # Avoid MuPDF warning.
- def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
- """Save pixmap as an OCR-ed PDF page.
- Args:
- compress: (bool) compress, default 1 (True).
- language: (str) language(s) occurring on page, default "eng" (English),
- multiples like "eng+ger" for English and German.
- tessdata: (str) folder name of Tesseract's language support. If None
- we use environment variable TESSDATA_PREFIX or search for
- Tesseract installation.
- Notes:
- On failure, make sure Tesseract is installed and you have set
- <tessdata> or environment variable "TESSDATA_PREFIX" to the folder
- containing your Tesseract's language support data.
- """
- tessdata = get_tessdata(tessdata)
- from io import BytesIO
- bio = BytesIO()
- self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata)
- return bio.getvalue()
- def pil_image(self):
- """Create a Pillow Image from the Pixmap."""
- try:
- from PIL import Image
- except ImportError:
- message("PIL/Pillow not installed")
- raise
- cspace = self.colorspace
- if not cspace:
- mode = "L"
- elif cspace.n == 1:
- mode = "L" if not self.alpha else "LA"
- elif cspace.n == 3:
- mode = "RGB" if not self.alpha else "RGBA"
- else:
- mode = "CMYK"
- img = Image.frombytes(mode, (self.width, self.height), self.samples)
- return img
- def pil_save(self, *args, **kwargs):
- """Write to image file using Pillow.
- An intermediate PIL Image is created, and its "save" method is used
- to store the image. See Pillow documentation to learn about the
- meaning of possible positional and keyword parameters.
- Use this when other output formats are desired.
- """
- img = self.pil_image()
- if "dpi" not in kwargs.keys():
- kwargs["dpi"] = (self.xres, self.yres)
- img.save(*args, **kwargs)
- def pil_tobytes(self, *args, **kwargs):
- """Convert to an image in memory using Pillow.
- An intermediate PIL Image is created, and its "save" method is used
- to store the image. See Pillow documentation to learn about the
- meaning of possible positional or keyword parameters.
- Use this when other output formats are desired.
- """
- bytes_out = io.BytesIO()
- img = self.pil_image()
- if "dpi" not in kwargs.keys():
- kwargs["dpi"] = (self.xres, self.yres)
- img.save(bytes_out, *args, **kwargs)
- return bytes_out.getvalue()
- def pixel(self, x, y):
- """Get color tuple of pixel (x, y).
- Last item is the alpha if Pixmap.alpha is true."""
- if g_use_extra:
- return extra.pixmap_pixel(self.this.m_internal, x, y)
- if (0
- or x < 0
- or x >= self.this.m_internal.w
- or y < 0
- or y >= self.this.m_internal.h
- ):
- RAISEPY(MSG_PIXEL_OUTSIDE, PyExc_ValueError)
- n = self.this.m_internal.n
- stride = self.this.m_internal.stride
- i = stride * y + n * x
- ret = tuple( self.samples_mv[ i: i+n])
- return ret
- @property
- def samples(self)->bytes:
- mv = self.samples_mv
- return bytes( mv)
- @property
- def samples_mv(self):
- '''
- Pixmap samples memoryview.
- '''
- # We remember the returned memoryview so that our `__del__()` can
- # release it; otherwise accessing it after we have been destructed will
- # fail, possibly crashing Python; this is #4155.
- #
- if self._samples_mv is None:
- self._samples_mv = mupdf.fz_pixmap_samples_memoryview(self.this)
- return self._samples_mv
-
- def _samples_mv_release(self):
- if self._samples_mv:
- self._samples_mv.release()
- @property
- def samples_ptr(self):
- return mupdf.fz_pixmap_samples_int(self.this)
- def save(self, filename, output=None, jpg_quality=95):
- """Output as image in format determined by filename extension.
- Args:
- output: (str) only use to overrule filename extension. Default is PNG.
- Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS.
- """
- valid_formats = {
- "png": 1,
- "pnm": 2,
- "pgm": 2,
- "ppm": 2,
- "pbm": 2,
- "pam": 3,
- "psd": 5,
- "ps": 6,
- "jpg": 7,
- "jpeg": 7,
- }
-
- if type(filename) is str:
- pass
- elif hasattr(filename, "absolute"):
- filename = str(filename)
- elif hasattr(filename, "name"):
- filename = filename.name
- if output is None:
- _, ext = os.path.splitext(filename)
- output = ext[1:]
- idx = valid_formats.get(output.lower(), None)
- if idx is None:
- raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
- if self.alpha and idx in (2, 6, 7):
- raise ValueError("'%s' cannot have alpha" % output)
- if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
- raise ValueError(f"unsupported colorspace for '{output}'")
- if idx == 7:
- self.set_dpi(self.xres, self.yres)
- return self._writeIMG(filename, idx, jpg_quality)
- def set_alpha(self, alphavalues=None, premultiply=1, opaque=None, matte=None):
- """Set alpha channel to values contained in a byte array.
- If omitted, set alphas to 255.
- Args:
- alphavalues: (bytes) with length (width * height) or 'None'.
- premultiply: (bool, True) premultiply colors with alpha values.
- opaque: (tuple, length colorspace.n) this color receives opacity 0.
- matte: (tuple, length colorspace.n)) preblending background color.
- """
- pix = self.this
- alpha = 0
- m = 0
- if pix.alpha() == 0:
- raise ValueError( MSG_PIX_NOALPHA)
- n = mupdf.fz_pixmap_colorants(pix)
- w = mupdf.fz_pixmap_width(pix)
- h = mupdf.fz_pixmap_height(pix)
- balen = w * h * (n+1)
- colors = [0, 0, 0, 0] # make this color opaque
- bgcolor = [0, 0, 0, 0] # preblending background color
- zero_out = 0
- bground = 0
- if opaque and isinstance(opaque, (list, tuple)) and len(opaque) == n:
- for i in range(n):
- colors[i] = opaque[i]
- zero_out = 1
- if matte and isinstance( matte, (tuple, list)) and len(matte) == n:
- for i in range(n):
- bgcolor[i] = matte[i]
- bground = 1
- data = bytes()
- data_len = 0
- if alphavalues:
- #res = JM_BufferFromBytes(alphavalues)
- #data_len, data = mupdf.fz_buffer_storage(res)
- #if data_len < w * h:
- # THROWMSG("bad alpha values")
- # fixme: don't seem to need to create an fz_buffer - can
- # use <alphavalues> directly?
- if isinstance(alphavalues, (bytes, bytearray)):
- data = alphavalues
- data_len = len(alphavalues)
- else:
- assert 0, f'unexpected type for alphavalues: {type(alphavalues)}'
- if data_len < w * h:
- raise ValueError( "bad alpha values")
- if 1:
- # Use C implementation for speed.
- mupdf.Pixmap_set_alpha_helper(
- balen,
- n,
- data_len,
- zero_out,
- mupdf.python_buffer_data( data),
- pix.m_internal,
- premultiply,
- bground,
- colors,
- bgcolor,
- )
- else:
- i = k = j = 0
- data_fix = 255
- while i < balen:
- alpha = data[k]
- if zero_out:
- for j in range(i, i+n):
- if mupdf.fz_samples_get(pix, j) != colors[j - i]:
- data_fix = 255
- break
- else:
- data_fix = 0
- if data_len:
- def fz_mul255( a, b):
- x = a * b + 128
- x += x // 256
- return x // 256
- if data_fix == 0:
- mupdf.fz_samples_set(pix, i+n, 0)
- else:
- mupdf.fz_samples_set(pix, i+n, alpha)
- if premultiply and not bground:
- for j in range(i, i+n):
- mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j), alpha))
- elif bground:
- for j in range( i, i+n):
- m = bgcolor[j - i]
- mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j) - m, alpha))
- else:
- mupdf.fz_samples_set(pix, i+n, data_fix)
- i += n+1
- k += 1
- def tobytes(self, output="png", jpg_quality=95):
- '''
- Convert to binary image stream of desired type.
- '''
- valid_formats = {
- "png": 1,
- "pnm": 2,
- "pgm": 2,
- "ppm": 2,
- "pbm": 2,
- "pam": 3,
- "tga": 4,
- "tpic": 4,
- "psd": 5,
- "ps": 6,
- 'jpg': 7,
- 'jpeg': 7,
- }
- idx = valid_formats.get(output.lower(), None)
- if idx is None:
- raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
- if self.alpha and idx in (2, 6, 7):
- raise ValueError("'{output}' cannot have alpha")
- if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
- raise ValueError(f"unsupported colorspace for '{output}'")
- if idx == 7:
- self.set_dpi(self.xres, self.yres)
- barray = self._tobytes(idx, jpg_quality)
- return barray
- def set_dpi(self, xres, yres):
- """Set resolution in both dimensions."""
- pm = self.this
- pm.m_internal.xres = xres
- pm.m_internal.yres = yres
- def set_origin(self, x, y):
- """Set top-left coordinates."""
- pm = self.this
- pm.m_internal.x = x
- pm.m_internal.y = y
- def set_pixel(self, x, y, color):
- """Set color of pixel (x, y)."""
- if g_use_extra:
- return extra.set_pixel(self.this.m_internal, x, y, color)
- pm = self.this
- if not _INRANGE(x, 0, pm.w() - 1) or not _INRANGE(y, 0, pm.h() - 1):
- raise ValueError( MSG_PIXEL_OUTSIDE)
- n = pm.n()
- for j in range(n):
- i = color[j]
- if not _INRANGE(i, 0, 255):
- raise ValueError( MSG_BAD_COLOR_SEQ)
- stride = mupdf.fz_pixmap_stride( pm)
- i = stride * y + n * x
- if 0:
- # Using a cached self._memory_view doesn't actually make much
- # difference to speed.
- if not self._memory_view:
- self._memory_view = self.samples_mv
- for j in range(n):
- self._memory_view[i + j] = color[j]
- else:
- for j in range(n):
- pm.fz_samples_set(i + j, color[j])
- def set_rect(self, bbox, color):
- """Set color of all pixels in bbox."""
- pm = self.this
- n = pm.n()
- c = []
- for j in range(n):
- i = color[j]
- if not _INRANGE(i, 0, 255):
- raise ValueError( MSG_BAD_COLOR_SEQ)
- c.append(i)
- bbox = JM_irect_from_py(bbox)
- i = JM_fill_pixmap_rect_with_color(pm, c, bbox)
- rc = bool(i)
- return rc
- def shrink(self, factor):
- """Divide width and height by 2**factor.
- E.g. factor=1 shrinks to 25% of original size (in place)."""
- if factor < 1:
- message_warning("ignoring shrink factor < 1")
- return
- mupdf.fz_subsample_pixmap( self.this, factor)
- # Pixmap has changed so clear our memory view.
- self._memory_view = None
- self._samples_mv_release()
- @property
- def size(self):
- """Pixmap size."""
- return mupdf.fz_pixmap_size( self.this)
- @property
- def stride(self):
- """Length of one image line (width * n)."""
- return self.this.stride()
- def tint_with(self, black, white):
- """Tint colors with modifiers for black and white."""
- if not self.colorspace or self.colorspace.n > 3:
- message("warning: colorspace invalid for function")
- return
- return mupdf.fz_tint_pixmap( self.this, black, white)
- @property
- def w(self):
- """The width."""
- return mupdf.fz_pixmap_width(self.this)
-
- def warp(self, quad, width, height):
- """Return pixmap from a warped quad."""
- if not quad.is_convex: raise ValueError("quad must be convex")
- q = JM_quad_from_py(quad)
- points = [ q.ul, q.ur, q.lr, q.ll]
- dst = mupdf.fz_warp_pixmap( self.this, points, width, height)
- return Pixmap( dst)
- @property
- def x(self):
- """x component of Pixmap origin."""
- return mupdf.fz_pixmap_x(self.this)
- @property
- def xres(self):
- """Resolution in x direction."""
- return self.this.xres()
- @property
- def y(self):
- """y component of Pixmap origin."""
- return mupdf.fz_pixmap_y(self.this)
- @property
- def yres(self):
- """Resolution in y direction."""
- return self.this.yres()
- width = w
- height = h
-
- def __del__(self):
- if self._samples_mv:
- self._samples_mv.release()
- del Point
- class Point:
- def __abs__(self):
- return math.sqrt(self.x * self.x + self.y * self.y)
- def __add__(self, p):
- if hasattr(p, "__float__"):
- return Point(self.x + p, self.y + p)
- if len(p) != 2:
- raise ValueError("Point: bad seq len")
- return Point(self.x + p[0], self.y + p[1])
- def __bool__(self):
- return not (max(self) == min(self) == 0)
- def __eq__(self, p):
- if not hasattr(p, "__len__"):
- return False
- return len(p) == 2 and not (self - p)
- def __getitem__(self, i):
- return (self.x, self.y)[i]
- def __hash__(self):
- return hash(tuple(self))
- def __init__(self, *args, x=None, y=None):
- '''
- Point() - all zeros
- Point(x, y)
- Point(Point) - new copy
- Point(sequence) - from 'sequence'
- Explicit keyword args x, y override earlier settings if not None.
- '''
- if not args:
- self.x = 0.0
- self.y = 0.0
- elif len(args) > 2:
- raise ValueError("Point: bad seq len")
- elif len(args) == 2:
- self.x = float(args[0])
- self.y = float(args[1])
- elif len(args) == 1:
- l = args[0]
- if isinstance(l, (mupdf.FzPoint, mupdf.fz_point)):
- self.x = l.x
- self.y = l.y
- else:
- if not hasattr(l, "__getitem__"):
- raise ValueError("Point: bad args")
- if len(l) != 2:
- raise ValueError("Point: bad seq len")
- self.x = float(l[0])
- self.y = float(l[1])
- else:
- raise ValueError("Point: bad seq len")
- if x is not None: self.x = x
- if y is not None: self.y = y
- def __len__(self):
- return 2
- def __mul__(self, m):
- if hasattr(m, "__float__"):
- return Point(self.x * m, self.y * m)
- if hasattr(m, "__getitem__") and len(m) == 2:
- # dot product
- return self.x * m[0] + self.y * m[1]
- p = Point(self)
- return p.transform(m)
- def __neg__(self):
- return Point(-self.x, -self.y)
- def __nonzero__(self):
- return not (max(self) == min(self) == 0)
- def __pos__(self):
- return Point(self)
- def __repr__(self):
- return "Point" + str(tuple(self))
- def __setitem__(self, i, v):
- v = float(v)
- if i == 0: self.x = v
- elif i == 1: self.y = v
- else:
- raise IndexError("index out of range")
- return None
- def __sub__(self, p):
- if hasattr(p, "__float__"):
- return Point(self.x - p, self.y - p)
- if len(p) != 2:
- raise ValueError("Point: bad seq len")
- return Point(self.x - p[0], self.y - p[1])
- def __truediv__(self, m):
- if hasattr(m, "__float__"):
- return Point(self.x * 1./m, self.y * 1./m)
- m1 = util_invert_matrix(m)[1]
- if not m1:
- raise ZeroDivisionError("matrix not invertible")
- p = Point(self)
- return p.transform(m1)
- @property
- def abs_unit(self):
- """Unit vector with positive coordinates."""
- s = self.x * self.x + self.y * self.y
- if s < EPSILON:
- return Point(0,0)
- s = math.sqrt(s)
- return Point(abs(self.x) / s, abs(self.y) / s)
- def distance_to(self, *args):
- """Return distance to rectangle or another point."""
- if not len(args) > 0:
- raise ValueError("at least one parameter must be given")
- x = args[0]
- if len(x) == 2:
- x = Point(x)
- elif len(x) == 4:
- x = Rect(x)
- else:
- raise ValueError("arg1 must be point-like or rect-like")
- if len(args) > 1:
- unit = args[1]
- else:
- unit = "px"
- u = {"px": (1.,1.), "in": (1.,72.), "cm": (2.54, 72.),
- "mm": (25.4, 72.)}
- f = u[unit][0] / u[unit][1]
- if type(x) is Point:
- return abs(self - x) * f
- # from here on, x is a rectangle
- # as a safeguard, make a finite copy of it
- r = Rect(x.top_left, x.top_left)
- r = r | x.bottom_right
- if self in r:
- return 0.0
- if self.x > r.x1:
- if self.y >= r.y1:
- return self.distance_to(r.bottom_right, unit)
- elif self.y <= r.y0:
- return self.distance_to(r.top_right, unit)
- else:
- return (self.x - r.x1) * f
- elif r.x0 <= self.x <= r.x1:
- if self.y >= r.y1:
- return (self.y - r.y1) * f
- else:
- return (r.y0 - self.y) * f
- else:
- if self.y >= r.y1:
- return self.distance_to(r.bottom_left, unit)
- elif self.y <= r.y0:
- return self.distance_to(r.top_left, unit)
- else:
- return (r.x0 - self.x) * f
- def transform(self, m):
- """Replace point by its transformation with matrix-like m."""
- if len(m) != 6:
- raise ValueError("Matrix: bad seq len")
- self.x, self.y = util_transform_point(self, m)
- return self
- @property
- def unit(self):
- """Unit vector of the point."""
- s = self.x * self.x + self.y * self.y
- if s < EPSILON:
- return Point(0,0)
- s = math.sqrt(s)
- return Point(self.x / s, self.y / s)
- __div__ = __truediv__
- norm = __abs__
- class Quad:
- def __abs__(self):
- if self.is_empty:
- return 0.0
- return abs(self.ul - self.ur) * abs(self.ul - self.ll)
- def __add__(self, q):
- if hasattr(q, "__float__"):
- return Quad(self.ul + q, self.ur + q, self.ll + q, self.lr + q)
- if len(q) != 4:
- raise ValueError("Quad: bad seq len")
- return Quad(self.ul + q[0], self.ur + q[1], self.ll + q[2], self.lr + q[3])
- def __bool__(self):
- return not self.is_empty
- def __contains__(self, x):
- try:
- l = x.__len__()
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return False
- if l == 2:
- return util_point_in_quad(x, self)
- if l != 4:
- return False
- if CheckRect(x):
- if Rect(x).is_empty:
- return True
- return util_point_in_quad(x[:2], self) and util_point_in_quad(x[2:], self)
- if CheckQuad(x):
- for i in range(4):
- if not util_point_in_quad(x[i], self):
- return False
- return True
- return False
- def __eq__(self, quad):
- if not hasattr(quad, "__len__"):
- return False
- return len(quad) == 4 and (
- self.ul == quad[0] and
- self.ur == quad[1] and
- self.ll == quad[2] and
- self.lr == quad[3]
- )
- def __getitem__(self, i):
- return (self.ul, self.ur, self.ll, self.lr)[i]
- def __hash__(self):
- return hash(tuple(self))
- def __init__(self, *args, ul=None, ur=None, ll=None, lr=None):
- '''
- Quad() - all zero points
- Quad(ul, ur, ll, lr)
- Quad(quad) - new copy
- Quad(sequence) - from 'sequence'
- Explicit keyword args ul, ur, ll, lr override earlier settings if not
- None.
-
- '''
- if not args:
- self.ul = self.ur = self.ll = self.lr = Point()
- elif len(args) > 4:
- raise ValueError("Quad: bad seq len")
- elif len(args) == 4:
- self.ul, self.ur, self.ll, self.lr = map(Point, args)
- elif len(args) == 1:
- l = args[0]
- if isinstance(l, mupdf.FzQuad):
- self.this = l
- self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr)
- elif not hasattr(l, "__getitem__"):
- raise ValueError("Quad: bad args")
- elif len(l) != 4:
- raise ValueError("Quad: bad seq len")
- else:
- self.ul, self.ur, self.ll, self.lr = map(Point, l)
- else:
- raise ValueError("Quad: bad args")
- if ul is not None: self.ul = Point(ul)
- if ur is not None: self.ur = Point(ur)
- if ll is not None: self.ll = Point(ll)
- if lr is not None: self.lr = Point(lr)
- def __len__(self):
- return 4
- def __mul__(self, m):
- q = Quad(self)
- q = q.transform(m)
- return q
- def __neg__(self):
- return Quad(-self.ul, -self.ur, -self.ll, -self.lr)
- def __nonzero__(self):
- return not self.is_empty
- def __pos__(self):
- return Quad(self)
- def __repr__(self):
- return "Quad" + str(tuple(self))
- def __setitem__(self, i, v):
- if i == 0: self.ul = Point(v)
- elif i == 1: self.ur = Point(v)
- elif i == 2: self.ll = Point(v)
- elif i == 3: self.lr = Point(v)
- else:
- raise IndexError("index out of range")
- return None
- def __sub__(self, q):
- if hasattr(q, "__float__"):
- return Quad(self.ul - q, self.ur - q, self.ll - q, self.lr - q)
- if len(q) != 4:
- raise ValueError("Quad: bad seq len")
- return Quad(self.ul - q[0], self.ur - q[1], self.ll - q[2], self.lr - q[3])
- def __truediv__(self, m):
- if hasattr(m, "__float__"):
- im = 1. / m
- else:
- im = util_invert_matrix(m)[1]
- if not im:
- raise ZeroDivisionError("Matrix not invertible")
- q = Quad(self)
- q = q.transform(im)
- return q
- @property
- def is_convex(self):
- """Check if quad is convex and not degenerate.
- Notes:
- Check that for the two diagonals, the other two corners are not
- on the same side of the diagonal.
- Returns:
- True or False.
- """
- m = planish_line(self.ul, self.lr) # puts this diagonal on x-axis
- p1 = self.ll * m # transform the
- p2 = self.ur * m # other two points
- if p1.y * p2.y > 0:
- return False
- m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis
- p1 = self.lr * m # transform the
- p2 = self.ul * m # remaining points
- if p1.y * p2.y > 0:
- return False
- return True
- @property
- def is_empty(self):
- """Check whether all quad corners are on the same line.
- This is the case if width or height is zero.
- """
- return self.width < EPSILON or self.height < EPSILON
- @property
- def is_infinite(self):
- """Check whether this is the infinite quad."""
- return self.rect.is_infinite
- @property
- def is_rectangular(self):
- """Check if quad is rectangular.
- Notes:
- Some rotation matrix can thus transform it into a rectangle.
- This is equivalent to three corners enclose 90 degrees.
- Returns:
- True or False.
- """
- sine = util_sine_between(self.ul, self.ur, self.lr)
- if abs(sine - 1) > EPSILON: # the sine of the angle
- return False
- sine = util_sine_between(self.ur, self.lr, self.ll)
- if abs(sine - 1) > EPSILON:
- return False
- sine = util_sine_between(self.lr, self.ll, self.ul)
- if abs(sine - 1) > EPSILON:
- return False
- return True
- def morph(self, p, m):
- """Morph the quad with matrix-like 'm' and point-like 'p'.
- Return a new quad."""
- if self.is_infinite:
- return INFINITE_QUAD()
- delta = Matrix(1, 1).pretranslate(p.x, p.y)
- q = self * ~delta * m * delta
- return q
- @property
- def rect(self):
- r = Rect()
- r.x0 = min(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
- r.y0 = min(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
- r.x1 = max(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
- r.y1 = max(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
- return r
- def transform(self, m):
- """Replace quad by its transformation with matrix m."""
- if hasattr(m, "__float__"):
- pass
- elif len(m) != 6:
- raise ValueError("Matrix: bad seq len")
- self.ul *= m
- self.ur *= m
- self.ll *= m
- self.lr *= m
- return self
- __div__ = __truediv__
- width = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr)))
- height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr)))
- class Rect:
-
- def __abs__(self):
- if self.is_empty or self.is_infinite:
- return 0.0
- return (self.x1 - self.x0) * (self.y1 - self.y0)
- def __add__(self, p):
- if hasattr(p, "__float__"):
- return Rect(self.x0 + p, self.y0 + p, self.x1 + p, self.y1 + p)
- if len(p) != 4:
- raise ValueError("Rect: bad seq len")
- return Rect(self.x0 + p[0], self.y0 + p[1], self.x1 + p[2], self.y1 + p[3])
- def __and__(self, x):
- if not hasattr(x, "__len__"):
- raise ValueError("bad operand 2")
- r1 = Rect(x)
- r = Rect(self)
- return r.intersect(r1)
- def __bool__(self):
- return not (max(self) == min(self) == 0)
- def __contains__(self, x):
- if hasattr(x, "__float__"):
- return x in tuple(self)
- l = len(x)
- if l == 2:
- return util_is_point_in_rect(x, self)
- if l == 4:
- r = INFINITE_RECT()
- try:
- r = Rect(x)
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- r = Quad(x).rect
- return (self.x0 <= r.x0 <= r.x1 <= self.x1 and
- self.y0 <= r.y0 <= r.y1 <= self.y1)
- return False
- def __eq__(self, rect):
- if not hasattr(rect, "__len__"):
- return False
- return len(rect) == 4 and not (self - rect)
- def __getitem__(self, i):
- return (self.x0, self.y0, self.x1, self.y1)[i]
- def __hash__(self):
- return hash(tuple(self))
- def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
- """
- Rect() - all zeros
- Rect(x0, y0, x1, y1)
- Rect(top-left, x1, y1)
- Rect(x0, y0, bottom-right)
- Rect(top-left, bottom-right)
- Rect(Rect or IRect) - new copy
- Rect(sequence) - from 'sequence'
-
- Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings
- if not None.
- """
- x0, y0, x1, y1 = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
- self.x0 = float( x0)
- self.y0 = float( y0)
- self.x1 = float( x1)
- self.y1 = float( y1)
- def __len__(self):
- return 4
- def __mul__(self, m):
- if hasattr(m, "__float__"):
- return Rect(self.x0 * m, self.y0 * m, self.x1 * m, self.y1 * m)
- r = Rect(self)
- r = r.transform(m)
- return r
- def __neg__(self):
- return Rect(-self.x0, -self.y0, -self.x1, -self.y1)
- def __nonzero__(self):
- return not (max(self) == min(self) == 0)
- def __or__(self, x):
- if not hasattr(x, "__len__"):
- raise ValueError("bad operand 2")
- r = Rect(self)
- if len(x) == 2:
- return r.include_point(x)
- if len(x) == 4:
- return r.include_rect(x)
- raise ValueError("bad operand 2")
- def __pos__(self):
- return Rect(self)
- def __repr__(self):
- return "Rect" + str(tuple(self))
- def __setitem__(self, i, v):
- v = float(v)
- if i == 0: self.x0 = v
- elif i == 1: self.y0 = v
- elif i == 2: self.x1 = v
- elif i == 3: self.y1 = v
- else:
- raise IndexError("index out of range")
- return None
- def __sub__(self, p):
- if hasattr(p, "__float__"):
- return Rect(self.x0 - p, self.y0 - p, self.x1 - p, self.y1 - p)
- if len(p) != 4:
- raise ValueError("Rect: bad seq len")
- return Rect(self.x0 - p[0], self.y0 - p[1], self.x1 - p[2], self.y1 - p[3])
- def __truediv__(self, m):
- if hasattr(m, "__float__"):
- return Rect(self.x0 * 1./m, self.y0 * 1./m, self.x1 * 1./m, self.y1 * 1./m)
- im = util_invert_matrix(m)[1]
- if not im:
- raise ZeroDivisionError(f"Matrix not invertible: {m}")
- r = Rect(self)
- r = r.transform(im)
- return r
- @property
- def bottom_left(self):
- """Bottom-left corner."""
- return Point(self.x0, self.y1)
- @property
- def bottom_right(self):
- """Bottom-right corner."""
- return Point(self.x1, self.y1)
- def contains(self, x):
- """Check if containing point-like or rect-like x."""
- return self.__contains__(x)
- @property
- def height(self):
- return max(0, self.y1 - self.y0)
- def get_area(self, *args) -> float:
- """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
- return _rect_area(self.width, self.height, args)
- def include_point(self, p):
- """Extend to include point-like p."""
- if len(p) != 2:
- raise ValueError("Point: bad seq len")
- self.x0, self.y0, self.x1, self.y1 = util_include_point_in_rect(self, p)
- return self
- def include_rect(self, r):
- """Extend to include rect-like r."""
- if len(r) != 4:
- raise ValueError("Rect: bad seq len")
- r = Rect(r)
- if r.is_infinite or self.is_infinite:
- self.x0, self.y0, self.x1, self.y1 = FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT
- elif r.is_empty:
- return self
- elif self.is_empty:
- self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
- else:
- self.x0, self.y0, self.x1, self.y1 = util_union_rect(self, r)
- return self
- def intersect(self, r):
- """Restrict to common rect with rect-like r."""
- if not len(r) == 4:
- raise ValueError("Rect: bad seq len")
- r = Rect(r)
- if r.is_infinite:
- return self
- elif self.is_infinite:
- self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
- elif r.is_empty:
- self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
- elif self.is_empty:
- return self
- else:
- self.x0, self.y0, self.x1, self.y1 = util_intersect_rect(self, r)
- return self
- def intersects(self, x):
- """Check if intersection with rectangle x is not empty."""
- rect2 = Rect(x)
- return (1
- and not self.is_empty
- and not self.is_infinite
- and not rect2.is_empty
- and not rect2.is_infinite
- and self.x0 < rect2.x1
- and rect2.x0 < self.x1
- and self.y0 < rect2.y1
- and rect2.y0 < self.y1
- )
- @property
- def is_empty(self):
- """True if rectangle area is empty."""
- return self.x0 >= self.x1 or self.y0 >= self.y1
- @property
- def is_infinite(self):
- """True if this is the infinite rectangle."""
- return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
- @property
- def is_valid(self):
- """True if rectangle is valid."""
- return self.x0 <= self.x1 and self.y0 <= self.y1
- def morph(self, p, m):
- """Morph with matrix-like m and point-like p.
- Returns a new quad."""
- if self.is_infinite:
- return INFINITE_QUAD()
- return self.quad.morph(p, m)
- def norm(self):
- return math.sqrt(sum([c*c for c in self]))
- def normalize(self):
- """Replace rectangle with its finite version."""
- if self.x1 < self.x0:
- self.x0, self.x1 = self.x1, self.x0
- if self.y1 < self.y0:
- self.y0, self.y1 = self.y1, self.y0
- return self
- @property
- def quad(self):
- """Return Quad version of rectangle."""
- return Quad(self.tl, self.tr, self.bl, self.br)
- def round(self):
- """Return the IRect."""
- return IRect(util_round_rect(self))
- @property
- def top_left(self):
- """Top-left corner."""
- return Point(self.x0, self.y0)
- @property
- def top_right(self):
- """Top-right corner."""
- return Point(self.x1, self.y0)
-
- def torect(self, r):
- """Return matrix that converts to target rect."""
- r = Rect(r)
- if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
- raise ValueError("rectangles must be finite and not empty")
- return (
- Matrix(1, 0, 0, 1, -self.x0, -self.y0)
- * Matrix(r.width / self.width, r.height / self.height)
- * Matrix(1, 0, 0, 1, r.x0, r.y0)
- )
- def transform(self, m):
- """Replace with the transformation by matrix-like m."""
- if not len(m) == 6:
- raise ValueError("Matrix: bad seq len")
- self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m)
- return self
- @property
- def width(self):
- return max(0, self.x1 - self.x0)
- __div__ = __truediv__
- bl = bottom_left
- br = bottom_right
- irect = property(round)
- tl = top_left
- tr = top_right
- class Shape:
- """Create a new shape."""
- @staticmethod
- def horizontal_angle(C, P):
- """Return the angle to the horizontal for the connection from C to P.
- This uses the arcus sine function and resolves its inherent ambiguity by
- looking up in which quadrant vector S = P - C is located.
- """
- S = Point(P - C).unit # unit vector 'C' -> 'P'
- alfa = math.asin(abs(S.y)) # absolute angle from horizontal
- if S.x < 0: # make arcsin result unique
- if S.y <= 0: # bottom-left
- alfa = -(math.pi - alfa)
- else: # top-left
- alfa = math.pi - alfa
- else:
- if S.y >= 0: # top-right
- pass
- else: # bottom-right
- alfa = -alfa
- return alfa
- def __init__(self, page: Page):
- CheckParent(page)
- self.page = page
- self.doc = page.parent
- if not self.doc.is_pdf:
- raise ValueError("is no PDF")
- self.height = page.mediabox_size.y
- self.width = page.mediabox_size.x
- self.x = page.cropbox_position.x
- self.y = page.cropbox_position.y
- self.pctm = page.transformation_matrix # page transf. matrix
- self.ipctm = ~self.pctm # inverted transf. matrix
- self.draw_cont = ""
- self.text_cont = ""
- self.totalcont = ""
- self.last_point = None
- self.rect = None
- def updateRect(self, x):
- if self.rect is None:
- if len(x) == 2:
- self.rect = Rect(x, x)
- else:
- self.rect = Rect(x)
- else:
- if len(x) == 2:
- x = Point(x)
- self.rect.x0 = min(self.rect.x0, x.x)
- self.rect.y0 = min(self.rect.y0, x.y)
- self.rect.x1 = max(self.rect.x1, x.x)
- self.rect.y1 = max(self.rect.y1, x.y)
- else:
- x = Rect(x)
- self.rect.x0 = min(self.rect.x0, x.x0)
- self.rect.y0 = min(self.rect.y0, x.y0)
- self.rect.x1 = max(self.rect.x1, x.x1)
- self.rect.y1 = max(self.rect.y1, x.y1)
- def draw_line(self, p1: point_like, p2: point_like) -> Point:
- """Draw a line between two points."""
- p1 = Point(p1)
- p2 = Point(p2)
- if not (self.last_point == p1):
- self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
- self.last_point = p1
- self.updateRect(p1)
- self.draw_cont += _format_g(JM_TUPLE(p2 * self.ipctm)) + " l\n"
- self.updateRect(p2)
- self.last_point = p2
- return self.last_point
- def draw_polyline(self, points: list) -> Point:
- """Draw several connected line segments."""
- for i, p in enumerate(points):
- if i == 0:
- if not (self.last_point == Point(p)):
- self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " m\n"
- self.last_point = Point(p)
- else:
- self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " l\n"
- self.updateRect(p)
- self.last_point = Point(points[-1])
- return self.last_point
- def draw_bezier(
- self,
- p1: point_like,
- p2: point_like,
- p3: point_like,
- p4: point_like,
- ) -> Point:
- """Draw a standard cubic Bezier curve."""
- p1 = Point(p1)
- p2 = Point(p2)
- p3 = Point(p3)
- p4 = Point(p4)
- if not (self.last_point == p1):
- self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
- args = JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
- self.draw_cont += _format_g(args) + " c\n"
- self.updateRect(p1)
- self.updateRect(p2)
- self.updateRect(p3)
- self.updateRect(p4)
- self.last_point = p4
- return self.last_point
- def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> Point:
- """Draw an ellipse inside a tetrapod."""
- if len(tetra) != 4:
- raise ValueError("invalid arg length")
- if hasattr(tetra[0], "__float__"):
- q = Rect(tetra).quad
- else:
- q = Quad(tetra)
- mt = q.ul + (q.ur - q.ul) * 0.5
- mr = q.ur + (q.lr - q.ur) * 0.5
- mb = q.ll + (q.lr - q.ll) * 0.5
- ml = q.ul + (q.ll - q.ul) * 0.5
- if not (self.last_point == ml):
- self.draw_cont += _format_g(JM_TUPLE(ml * self.ipctm)) + " m\n"
- self.last_point = ml
- self.draw_curve(ml, q.ll, mb)
- self.draw_curve(mb, q.lr, mr)
- self.draw_curve(mr, q.ur, mt)
- self.draw_curve(mt, q.ul, ml)
- self.updateRect(q.rect)
- self.last_point = ml
- return self.last_point
- def draw_circle(self, center: point_like, radius: float) -> Point:
- """Draw a circle given its center and radius."""
- if not radius > EPSILON:
- raise ValueError("radius must be positive")
- center = Point(center)
- p1 = center - (radius, 0)
- return self.draw_sector(center, p1, 360, fullSector=False)
- def draw_curve(
- self,
- p1: point_like,
- p2: point_like,
- p3: point_like,
- ) -> Point:
- """Draw a curve between points using one control point."""
- kappa = 0.55228474983
- p1 = Point(p1)
- p2 = Point(p2)
- p3 = Point(p3)
- k1 = p1 + (p2 - p1) * kappa
- k2 = p3 + (p2 - p3) * kappa
- return self.draw_bezier(p1, k1, k2, p3)
- def draw_sector(
- self,
- center: point_like,
- point: point_like,
- beta: float,
- fullSector: bool = True,
- ) -> Point:
- """Draw a circle sector."""
- center = Point(center)
- point = Point(point)
- l3 = lambda a, b: _format_g((a, b)) + " m\n"
- l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n"
- l5 = lambda a, b: _format_g((a, b)) + " l\n"
- betar = math.radians(-beta)
- w360 = math.radians(math.copysign(360, betar)) * (-1)
- w90 = math.radians(math.copysign(90, betar))
- w45 = w90 / 2
- while abs(betar) > 2 * math.pi:
- betar += w360 # bring angle below 360 degrees
- if not (self.last_point == point):
- self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
- self.last_point = point
- Q = Point(0, 0) # just make sure it exists
- C = center
- P = point
- S = P - C # vector 'center' -> 'point'
- rad = abs(S) # circle radius
- if not rad > EPSILON:
- raise ValueError("radius must be positive")
- alfa = self.horizontal_angle(center, point)
- while abs(betar) > abs(w90): # draw 90 degree arcs
- q1 = C.x + math.cos(alfa + w90) * rad
- q2 = C.y + math.sin(alfa + w90) * rad
- Q = Point(q1, q2) # the arc's end point
- r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
- r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
- R = Point(r1, r2) # crossing point of tangents
- kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
- kappa = kappah * abs(P - Q)
- cp1 = P + (R - P) * kappa # control point 1
- cp2 = Q + (R - Q) * kappa # control point 2
- self.draw_cont += l4(*JM_TUPLE(
- list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
- ))
- betar -= w90 # reduce param angle by 90 deg
- alfa += w90 # advance start angle by 90 deg
- P = Q # advance to arc end point
- # draw (remaining) arc
- if abs(betar) > 1e-3: # significant degrees left?
- beta2 = betar / 2
- q1 = C.x + math.cos(alfa + betar) * rad
- q2 = C.y + math.sin(alfa + betar) * rad
- Q = Point(q1, q2) # the arc's end point
- r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
- r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
- R = Point(r1, r2) # crossing point of tangents
- # kappa height is 4/3 of segment height
- kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q) # kappa height
- kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
- cp1 = P + (R - P) * kappa # control point 1
- cp2 = Q + (R - Q) * kappa # control point 2
- self.draw_cont += l4(*JM_TUPLE(
- list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
- ))
- if fullSector:
- self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
- self.draw_cont += l5(*JM_TUPLE(center * self.ipctm))
- self.draw_cont += l5(*JM_TUPLE(Q * self.ipctm))
- self.last_point = Q
- return self.last_point
- def draw_rect(self, rect: rect_like, *, radius=None) -> Point:
- """Draw a rectangle.
- Args:
- radius: if not None, the rectangle will have rounded corners.
- This is the radius of the curvature, given as percentage of
- the rectangle width or height. Valid are values 0 < v <= 0.5.
- For a sequence of two values, the corners will have different
- radii. Otherwise, the percentage will be computed from the
- shorter side. A value of (0.5, 0.5) will draw an ellipse.
- """
- r = Rect(rect)
- if radius is None: # standard rectangle
- self.draw_cont += _format_g(JM_TUPLE(
- list(r.bl * self.ipctm) + [r.width, r.height]
- )) + " re\n"
- self.updateRect(r)
- self.last_point = r.tl
- return self.last_point
- # rounded corners requested. This requires 1 or 2 values, each
- # with 0 < value <= 0.5
- if hasattr(radius, "__float__"):
- if radius <= 0 or radius > 0.5:
- raise ValueError(f"bad radius value {radius}.")
- d = min(r.width, r.height) * radius
- px = (d, 0)
- py = (0, d)
- elif hasattr(radius, "__len__") and len(radius) == 2:
- rx, ry = radius
- px = (rx * r.width, 0)
- py = (0, ry * r.height)
- if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
- raise ValueError(f"bad radius value {radius}.")
- else:
- raise ValueError(f"bad radius value {radius}.")
- lp = self.draw_line(r.tl + py, r.bl - py)
- lp = self.draw_curve(lp, r.bl, r.bl + px)
- lp = self.draw_line(lp, r.br - px)
- lp = self.draw_curve(lp, r.br, r.br - py)
- lp = self.draw_line(lp, r.tr + py)
- lp = self.draw_curve(lp, r.tr, r.tr - px)
- lp = self.draw_line(lp, r.tl + px)
- self.last_point = self.draw_curve(lp, r.tl, r.tl + py)
- self.updateRect(r)
- return self.last_point
- def draw_quad(self, quad: quad_like) -> Point:
- """Draw a Quad."""
- q = Quad(quad)
- return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
- def draw_zigzag(
- self,
- p1: point_like,
- p2: point_like,
- breadth: float = 2,
- ) -> Point:
- """Draw a zig-zagged line from p1 to p2."""
- p1 = Point(p1)
- p2 = Point(p2)
- S = p2 - p1 # vector start - end
- rad = abs(S) # distance of points
- cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases
- if cnt < 4:
- raise ValueError("points too close")
- mb = rad / cnt # revised breadth
- matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis
- i_mat = ~matrix # get original position
- points = [] # stores edges
- for i in range(1, cnt):
- if i % 4 == 1: # point "above" connection
- p = Point(i, -1) * mb
- elif i % 4 == 3: # point "below" connection
- p = Point(i, 1) * mb
- else: # ignore others
- continue
- points.append(p * i_mat)
- self.draw_polyline([p1] + points + [p2]) # add start and end points
- return p2
- def draw_squiggle(
- self,
- p1: point_like,
- p2: point_like,
- breadth=2,
- ) -> Point:
- """Draw a squiggly line from p1 to p2."""
- p1 = Point(p1)
- p2 = Point(p2)
- S = p2 - p1 # vector start - end
- rad = abs(S) # distance of points
- cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases
- if cnt < 4:
- raise ValueError("points too close")
- mb = rad / cnt # revised breadth
- matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis
- i_mat = ~matrix # get original position
- k = 2.4142135623765633 # y of draw_curve helper point
- points = [] # stores edges
- for i in range(1, cnt):
- if i % 4 == 1: # point "above" connection
- p = Point(i, -k) * mb
- elif i % 4 == 3: # point "below" connection
- p = Point(i, k) * mb
- else: # else on connection line
- p = Point(i, 0) * mb
- points.append(p * i_mat)
- points = [p1] + points + [p2]
- cnt = len(points)
- i = 0
- while i + 2 < cnt:
- self.draw_curve(points[i], points[i + 1], points[i + 2])
- i += 2
- return p2
- # ==============================================================================
- # Shape.insert_text
- # ==============================================================================
- def insert_text(
- self,
- point: point_like,
- buffer: typing.Union[str, list],
- *,
- fontsize: float = 11,
- lineheight: OptFloat = None,
- fontname: str = "helv",
- fontfile: OptStr = None,
- set_simple: bool = 0,
- encoding: int = 0,
- color: OptSeq = None,
- fill: OptSeq = None,
- render_mode: int = 0,
- border_width: float = 0.05,
- miter_limit: float = 1,
- rotate: int = 0,
- morph: OptSeq = None,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> int:
- # ensure 'text' is a list of strings, worth dealing with
- if not bool(buffer):
- return 0
- if type(buffer) not in (list, tuple):
- text = buffer.splitlines()
- else:
- text = buffer
- if not len(text) > 0:
- return 0
- point = Point(point)
- try:
- maxcode = max([ord(c) for c in " ".join(text)])
- except Exception:
- exception_info()
- return 0
- # ensure valid 'fontname'
- fname = fontname
- if fname.startswith("/"):
- fname = fname[1:]
- xref = self.page.insert_font(
- fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
- )
- fontinfo = CheckFontInfo(self.doc, xref)
- fontdict = fontinfo[1]
- ordering = fontdict["ordering"]
- simple = fontdict["simple"]
- bfname = fontdict["name"]
- ascender = fontdict["ascender"]
- descender = fontdict["descender"]
- if lineheight:
- lheight = fontsize * lineheight
- elif ascender - descender <= 1:
- lheight = fontsize * 1.2
- else:
- lheight = fontsize * (ascender - descender)
- if maxcode > 255:
- glyphs = self.doc.get_char_widths(xref, maxcode + 1)
- else:
- glyphs = fontdict["glyphs"]
- tab = []
- for t in text:
- if simple and bfname not in ("Symbol", "ZapfDingbats"):
- g = None
- else:
- g = glyphs
- tab.append(getTJstr(t, g, simple, ordering))
- text = tab
- color_str = ColorCode(color, "c")
- fill_str = ColorCode(fill, "f")
- if not fill and render_mode == 0: # ensure fill color when 0 Tr
- fill = color
- fill_str = ColorCode(color, "f")
- morphing = CheckMorph(morph)
- rot = rotate
- if rot % 90 != 0:
- raise ValueError("bad rotate value")
- while rot < 0:
- rot += 360
- rot = rot % 360 # text rotate = 0, 90, 270, 180
- templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf "
- templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
- cmp90 = "0 1 -1 0 0 0 cm\n" # rotates 90 deg counter-clockwise
- cmm90 = "0 -1 1 0 0 0 cm\n" # rotates 90 deg clockwise
- cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg.
- height = self.height
- width = self.width
- # setting up for standard rotation directions
- # case rotate = 0
- if morphing:
- m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
- mat = ~m1 * morph[1] * m1
- cm = _format_g(JM_TUPLE(mat)) + " cm\n"
- else:
- cm = ""
- top = height - point.y - self.y # start of 1st char
- left = point.x + self.x # start of 1. char
- space = top # space available
- #headroom = point.y + self.y # distance to page border
- if rot == 90:
- left = height - point.y - self.y
- top = -point.x - self.x
- cm += cmp90
- space = width - abs(top)
- #headroom = point.x + self.x
- elif rot == 270:
- left = -height + point.y + self.y
- top = point.x + self.x
- cm += cmm90
- space = abs(top)
- #headroom = width - point.x - self.x
- elif rot == 180:
- left = -point.x - self.x
- top = -height + point.y + self.y
- cm += cm180
- space = abs(point.y + self.y)
- #headroom = height - point.y - self.y
- optcont = self.page._get_optional_content(oc)
- if optcont is not None:
- bdc = "/OC /%s BDC\n" % optcont
- emc = "EMC\n"
- else:
- bdc = emc = ""
- alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
- if alpha is None:
- alpha = ""
- else:
- alpha = "/%s gs\n" % alpha
- nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)
- if render_mode > 0:
- nres += "%i Tr " % render_mode
- nres += _format_g(border_width * fontsize) + " w "
- if miter_limit is not None:
- nres += _format_g(miter_limit) + " M "
- if color is not None:
- nres += color_str
- if fill is not None:
- nres += fill_str
- # =========================================================================
- # start text insertion
- # =========================================================================
- nres += text[0]
- nlines = 1 # set output line counter
- if len(text) > 1:
- nres += templ2(lheight) # line 1
- else:
- nres += 'TJ'
- for i in range(1, len(text)):
- if space < lheight:
- break # no space left on page
- if i > 1:
- nres += "\nT* "
- nres += text[i] + 'TJ'
- space -= lheight
- nlines += 1
- nres += "\nET\n%sQ\n" % emc
- # =========================================================================
- # end of text insertion
- # =========================================================================
- # update the /Contents object
- self.text_cont += nres
- return nlines
- # ==============================================================================
- # Shape.insert_textbox
- # ==============================================================================
- def insert_textbox(
- self,
- rect: rect_like,
- buffer: typing.Union[str, list],
- *,
- fontname: OptStr = "helv",
- fontfile: OptStr = None,
- fontsize: float = 11,
- lineheight: OptFloat = None,
- set_simple: bool = 0,
- encoding: int = 0,
- color: OptSeq = None,
- fill: OptSeq = None,
- expandtabs: int = 1,
- border_width: float = 0.05,
- miter_limit: float = 1,
- align: int = 0,
- render_mode: int = 0,
- rotate: int = 0,
- morph: OptSeq = None,
- stroke_opacity: float = 1,
- fill_opacity: float = 1,
- oc: int = 0,
- ) -> float:
- """Insert text into a given rectangle.
- Args:
- rect -- the textbox to fill
- buffer -- text to be inserted
- fontname -- a Base-14 font, font name or '/name'
- fontfile -- name of a font file
- fontsize -- font size
- lineheight -- overwrite the font property
- color -- RGB stroke color triple
- fill -- RGB fill color triple
- render_mode -- text rendering control
- border_width -- thickness of glyph borders as percentage of fontsize
- expandtabs -- handles tabulators with string function
- align -- left, center, right, justified
- rotate -- 0, 90, 180, or 270 degrees
- morph -- morph box with a matrix and a fixpoint
- Returns:
- unused or deficit rectangle area (float)
- """
- rect = Rect(rect)
- if rect.is_empty or rect.is_infinite:
- raise ValueError("text box must be finite and not empty")
- color_str = ColorCode(color, "c")
- fill_str = ColorCode(fill, "f")
- if fill is None and render_mode == 0: # ensure fill color for 0 Tr
- fill = color
- fill_str = ColorCode(color, "f")
- optcont = self.page._get_optional_content(oc)
- if optcont is not None:
- bdc = "/OC /%s BDC\n" % optcont
- emc = "EMC\n"
- else:
- bdc = emc = ""
- # determine opacity / transparency
- alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
- if alpha is None:
- alpha = ""
- else:
- alpha = "/%s gs\n" % alpha
- if rotate % 90 != 0:
- raise ValueError("rotate must be multiple of 90")
- rot = rotate
- while rot < 0:
- rot += 360
- rot = rot % 360
- # is buffer worth of dealing with?
- if not bool(buffer):
- return rect.height if rot in (0, 180) else rect.width
- cmp90 = "0 1 -1 0 0 0 cm\n" # rotates counter-clockwise
- cmm90 = "0 -1 1 0 0 0 cm\n" # rotates clockwise
- cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg.
- height = self.height
- fname = fontname
- if fname.startswith("/"):
- fname = fname[1:]
- xref = self.page.insert_font(
- fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
- )
- fontinfo = CheckFontInfo(self.doc, xref)
- fontdict = fontinfo[1]
- ordering = fontdict["ordering"]
- simple = fontdict["simple"]
- glyphs = fontdict["glyphs"]
- bfname = fontdict["name"]
- ascender = fontdict["ascender"]
- descender = fontdict["descender"]
- if lineheight:
- lheight_factor = lineheight
- elif ascender - descender <= 1:
- lheight_factor = 1.2
- else:
- lheight_factor = ascender - descender
- lheight = fontsize * lheight_factor
- # create a list from buffer, split into its lines
- if type(buffer) in (list, tuple):
- t0 = "\n".join(buffer)
- else:
- t0 = buffer
- maxcode = max([ord(c) for c in t0])
- # replace invalid char codes for simple fonts
- if simple and maxcode > 255:
- t0 = "".join([c if ord(c) < 256 else "?" for c in t0])
- t0 = t0.splitlines()
- glyphs = self.doc.get_char_widths(xref, maxcode + 1)
- if simple and bfname not in ("Symbol", "ZapfDingbats"):
- tj_glyphs = None
- else:
- tj_glyphs = glyphs
- # ----------------------------------------------------------------------
- # calculate pixel length of a string
- # ----------------------------------------------------------------------
- def pixlen(x):
- """Calculate pixel length of x."""
- if ordering < 0:
- return sum([glyphs[ord(c)][1] for c in x]) * fontsize
- else:
- return len(x) * fontsize
- # ---------------------------------------------------------------------
- if ordering < 0:
- blen = glyphs[32][1] * fontsize # pixel size of space character
- else:
- blen = fontsize
- text = "" # output buffer
- if CheckMorph(morph):
- m1 = Matrix(
- 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
- )
- mat = ~m1 * morph[1] * m1
- cm = _format_g(JM_TUPLE(mat)) + " cm\n"
- else:
- cm = ""
- # ---------------------------------------------------------------------
- # adjust for text orientation / rotation
- # ---------------------------------------------------------------------
- progr = 1 # direction of line progress
- c_pnt = Point(0, fontsize * ascender) # used for line progress
- if rot == 0: # normal orientation
- point = rect.tl + c_pnt # line 1 is 'lheight' below top
- maxwidth = rect.width # pixels available in one line
- maxheight = rect.height # available text height
- elif rot == 90: # rotate counter clockwise
- c_pnt = Point(fontsize * ascender, 0) # progress in x-direction
- point = rect.bl + c_pnt # line 1 'lheight' away from left
- maxwidth = rect.height # pixels available in one line
- maxheight = rect.width # available text height
- cm += cmp90
- elif rot == 180: # text upside down
- # progress upwards in y direction
- c_pnt = -Point(0, fontsize * ascender)
- point = rect.br + c_pnt # line 1 'lheight' above bottom
- maxwidth = rect.width # pixels available in one line
- progr = -1 # subtract lheight for next line
- maxheight =rect.height # available text height
- cm += cm180
- else: # rotate clockwise (270 or -90)
- # progress from right to left
- c_pnt = -Point(fontsize * ascender, 0)
- point = rect.tr + c_pnt # line 1 'lheight' left of right
- maxwidth = rect.height # pixels available in one line
- progr = -1 # subtract lheight for next line
- maxheight = rect.width # available text height
- cm += cmm90
- # =====================================================================
- # line loop
- # =====================================================================
- just_tab = [] # 'justify' indicators per line
- for i, line in enumerate(t0):
- line_t = line.expandtabs(expandtabs).split(" ") # split into words
- num_words = len(line_t)
- lbuff = "" # init line buffer
- rest = maxwidth # available line pixels
- # =================================================================
- # word loop
- # =================================================================
- for j in range(num_words):
- word = line_t[j]
- pl_w = pixlen(word) # pixel len of word
- if rest >= pl_w: # does it fit on the line?
- lbuff += word + " " # yes, append word
- rest -= pl_w + blen # update available line space
- continue # next word
- # word doesn't fit - output line (if not empty)
- if lbuff:
- lbuff = lbuff.rstrip() + "\n" # line full, append line break
- text += lbuff # append to total text
- just_tab.append(True) # can align-justify
- lbuff = "" # re-init line buffer
- rest = maxwidth # re-init avail. space
- if pl_w <= maxwidth: # word shorter than 1 line?
- lbuff = word + " " # start the line with it
- rest = maxwidth - pl_w - blen # update free space
- continue
- # long word: split across multiple lines - char by char ...
- if len(just_tab) > 0:
- just_tab[-1] = False # cannot align-justify
- for c in word:
- if pixlen(lbuff) <= maxwidth - pixlen(c):
- lbuff += c
- else: # line full
- lbuff += "\n" # close line
- text += lbuff # append to text
- just_tab.append(False) # cannot align-justify
- lbuff = c # start new line with this char
- lbuff += " " # finish long word
- rest = maxwidth - pixlen(lbuff) # long word stored
- if lbuff: # unprocessed line content?
- text += lbuff.rstrip() # append to text
- just_tab.append(False) # cannot align-justify
- if i < len(t0) - 1: # not the last line?
- text += "\n" # insert line break
- # compute used part of the textbox
- if text.endswith("\n"):
- text = text[:-1]
- lb_count = text.count("\n") + 1 # number of lines written
- # text height = line count * line height plus one descender value
- text_height = lheight * lb_count - descender * fontsize
- more = text_height - maxheight # difference to height limit
- if more > EPSILON: # landed too much outside rect
- return (-1) * more # return deficit, don't output
- more = abs(more)
- if more < EPSILON:
- more = 0 # don't bother with epsilons
- nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm # initialize output buffer
- templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf "
- # center, right, justify: output each line with its own specifics
- text_t = text.splitlines() # split text in lines again
- just_tab[-1] = False # never justify last line
- for i, t in enumerate(text_t):
- spacing = 0
- pl = maxwidth - pixlen(t) # length of empty line part
- pnt = point + c_pnt * (i * lheight_factor) # text start of line
- if align == 1: # center: right shift by half width
- if rot in (0, 180):
- pnt = pnt + Point(pl / 2, 0) * progr
- else:
- pnt = pnt - Point(0, pl / 2) * progr
- elif align == 2: # right: right shift by full width
- if rot in (0, 180):
- pnt = pnt + Point(pl, 0) * progr
- else:
- pnt = pnt - Point(0, pl) * progr
- elif align == 3: # justify
- spaces = t.count(" ") # number of spaces in line
- if spaces > 0 and just_tab[i]: # if any, and we may justify
- spacing = pl / spaces # make every space this much larger
- else:
- spacing = 0 # keep normal space length
- top = height - pnt.y - self.y
- left = pnt.x + self.x
- if rot == 90:
- left = height - pnt.y - self.y
- top = -pnt.x - self.x
- elif rot == 270:
- left = -height + pnt.y + self.y
- top = pnt.x + self.x
- elif rot == 180:
- left = -pnt.x - self.x
- top = -height + pnt.y + self.y
- nres += templ(left, top, fname, fontsize)
- if render_mode > 0:
- nres += "%i Tr " % render_mode
- nres += _format_g(border_width * fontsize) + " w "
- if miter_limit is not None:
- nres += _format_g(miter_limit) + " M "
- if align == 3:
- nres += _format_g(spacing) + " Tw "
- if color is not None:
- nres += color_str
- if fill is not None:
- nres += fill_str
- nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering)
- nres += "ET\n%sQ\n" % emc
- self.text_cont += nres
- self.updateRect(rect)
- return more
- def finish(
- self,
- width: float = 1,
- color: OptSeq = (0,),
- fill: OptSeq = None,
- lineCap: int = 0,
- lineJoin: int = 0,
- dashes: OptStr = None,
- even_odd: bool = False,
- morph: OptSeq = None,
- closePath: bool = True,
- fill_opacity: float = 1,
- stroke_opacity: float = 1,
- oc: int = 0,
- ) -> None:
- """Finish the current drawing segment.
- Notes:
- Apply colors, opacity, dashes, line style and width, or
- morphing. Also whether to close the path
- by connecting last to first point.
- """
- if self.draw_cont == "": # treat empty contents as no-op
- return
- if width == 0: # border color makes no sense then
- color = None
- elif color is None: # vice versa
- width = 0
- # if color == None and fill == None:
- # raise ValueError("at least one of 'color' or 'fill' must be given")
- color_str = ColorCode(color, "c") # ensure proper color string
- fill_str = ColorCode(fill, "f") # ensure proper fill string
- optcont = self.page._get_optional_content(oc)
- if optcont is not None:
- self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
- emc = "EMC\n"
- else:
- emc = ""
- alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
- if alpha is not None:
- self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
- if width != 1 and width != 0:
- self.draw_cont += _format_g(width) + " w\n"
- if lineCap != 0:
- self.draw_cont = "%i J\n" % lineCap + self.draw_cont
- if lineJoin != 0:
- self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
- if dashes not in (None, "", "[] 0"):
- self.draw_cont = "%s d\n" % dashes + self.draw_cont
- if closePath:
- self.draw_cont += "h\n"
- self.last_point = None
- if color is not None:
- self.draw_cont += color_str
- if fill is not None:
- self.draw_cont += fill_str
- if color is not None:
- if not even_odd:
- self.draw_cont += "B\n"
- else:
- self.draw_cont += "B*\n"
- else:
- if not even_odd:
- self.draw_cont += "f\n"
- else:
- self.draw_cont += "f*\n"
- else:
- self.draw_cont += "S\n"
- self.draw_cont += emc
- if CheckMorph(morph):
- m1 = Matrix(
- 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
- )
- mat = ~m1 * morph[1] * m1
- self.draw_cont = _format_g(JM_TUPLE(mat)) + " cm\n" + self.draw_cont
- self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
- self.draw_cont = ""
- self.last_point = None
- return
- def commit(self, overlay: bool = True) -> None:
- """Update the page's /Contents object with Shape data.
- The argument controls whether data appear in foreground (default)
- or background.
- """
- CheckParent(self.page) # doc may have died meanwhile
- self.totalcont += self.text_cont
- self.totalcont = self.totalcont.encode()
- if self.totalcont:
- if overlay:
- self.page.wrap_contents() # ensure a balanced graphics state
- # make /Contents object with dummy stream
- xref = TOOLS._insert_contents(self.page, b" ", overlay)
- # update it with potential compression
- self.doc.update_stream(xref, self.totalcont)
- self.last_point = None # clean up ...
- self.rect = None #
- self.draw_cont = "" # for potential ...
- self.text_cont = "" # ...
- self.totalcont = "" # re-use
- class Story:
- def __init__( self, html='', user_css=None, em=12, archive=None):
- buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8'))
- if archive and not isinstance(archive, Archive):
- archive = Archive(archive)
- arch = archive.this if archive else mupdf.FzArchive( None)
- if hasattr(mupdf, 'FzStoryS'):
- self.this = mupdf.FzStoryS( buffer_, user_css, em, arch)
- else:
- self.this = mupdf.FzStory( buffer_, user_css, em, arch)
-
- def add_header_ids(self):
- '''
- Look for `<h1..6>` items in `self` and adds unique `id`
- attributes if not already present.
- '''
- dom = self.body
- i = 0
- x = dom.find(None, None, None)
- while x:
- name = x.tagname
- if len(name) == 2 and name[0]=="h" and name[1] in "123456":
- attr = x.get_attribute_value("id")
- if not attr:
- id_ = f"h_id_{i}"
- #log(f"{name=}: setting {id_=}")
- x.set_attribute("id", id_)
- i += 1
- x = x.find_next(None, None, None)
- @staticmethod
- def add_pdf_links(document_or_stream, positions):
- """
- Adds links to PDF document.
- Args:
- document_or_stream:
- A PDF `Document` or raw PDF content, for example an
- `io.BytesIO` instance.
- positions:
- List of `ElementPosition`'s for `document_or_stream`,
- typically from Story.element_positions(). We raise an
- exception if two or more positions have same id.
- Returns:
- `document_or_stream` if a `Document` instance, otherwise a
- new `Document` instance.
- We raise an exception if an `href` in `positions` refers to an
- internal position `#<name>` but no item in `positions` has `id =
- name`.
- """
- if isinstance(document_or_stream, Document):
- document = document_or_stream
- else:
- document = Document("pdf", document_or_stream)
- # Create dict from id to position, which we will use to find
- # link destinations.
- #
- id_to_position = dict()
- #log(f"positions: {positions}")
- for position in positions:
- #log(f"add_pdf_links(): position: {position}")
- if (position.open_close & 1) and position.id:
- #log(f"add_pdf_links(): position with id: {position}")
- if position.id in id_to_position:
- #log(f"Ignoring duplicate positions with id={position.id!r}")
- pass
- else:
- id_to_position[ position.id] = position
- # Insert links for all positions that have an `href`.
- #
- for position_from in positions:
-
- if (position_from.open_close & 1) and position_from.href:
-
- #log(f"add_pdf_links(): position with href: {position}")
- link = dict()
- link['from'] = Rect(position_from.rect)
-
- if position_from.href.startswith("#"):
- #`<a href="#...">...</a>` internal link.
- target_id = position_from.href[1:]
- try:
- position_to = id_to_position[ target_id]
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") from e
- # Make link from `position_from`'s rect to top-left of
- # `position_to`'s rect.
- if 0:
- log(f"add_pdf_links(): making link from:")
- log(f"add_pdf_links(): {position_from}")
- log(f"add_pdf_links(): to:")
- log(f"add_pdf_links(): {position_to}")
- link["kind"] = LINK_GOTO
- x0, y0, x1, y1 = position_to.rect
- # This appears to work well with viewers which scroll
- # to make destination point top-left of window.
- link["to"] = Point(x0, y0)
- link["page"] = position_to.page_num - 1
-
- else:
- # `<a href="...">...</a>` external link.
- if position_from.href.startswith('name:'):
- link['kind'] = LINK_NAMED
- link['name'] = position_from.href[5:]
- else:
- link['kind'] = LINK_URI
- link['uri'] = position_from.href
-
- #log(f'Adding link: {position_from.page_num=} {link=}.')
- document[position_from.page_num - 1].insert_link(link)
-
- return document
- @property
- def body(self):
- dom = self.document()
- return dom.bodytag()
-
- def document( self):
- dom = mupdf.fz_story_document( self.this)
- return Xml( dom)
- def draw( self, device, matrix=None):
- ctm2 = JM_matrix_from_py( matrix)
- dev = device.this if device else mupdf.FzDevice( None)
- mupdf.fz_draw_story( self.this, dev, ctm2)
- def element_positions( self, function, args=None):
- '''
- Trigger a callback function to record where items have been placed.
- '''
- if type(args) is dict:
- for k in args.keys():
- if not (type(k) is str and k.isidentifier()):
- raise ValueError(f"invalid key '{k}'")
- else:
- args = {}
- if not callable(function) or function.__code__.co_argcount != 1:
- raise ValueError("callback 'function' must be a callable with exactly one argument")
-
- def function2( position):
- class Position2:
- pass
- position2 = Position2()
- position2.depth = position.depth
- position2.heading = position.heading
- position2.id = position.id
- position2.rect = JM_py_from_rect(position.rect)
- position2.text = position.text
- position2.open_close = position.open_close
- position2.rect_num = position.rectangle_num
- position2.href = position.href
- if args:
- for k, v in args.items():
- setattr( position2, k, v)
- function( position2)
- mupdf.fz_story_positions( self.this, function2)
- def place( self, where, flags=0):
- '''
- Wrapper for fz_place_story_flags().
- '''
- where = JM_rect_from_py( where)
- filled = mupdf.FzRect()
- more = mupdf.fz_place_story_flags( self.this, where, filled, flags)
- return more, JM_py_from_rect( filled)
- def reset( self):
- mupdf.fz_reset_story( self.this)
-
- def write(self, writer, rectfn, positionfn=None, pagefn=None):
- dev = None
- page_num = 0
- rect_num = 0
- filled = Rect(0, 0, 0, 0)
- while 1:
- mediabox, rect, ctm = rectfn(rect_num, filled)
- rect_num += 1
- if mediabox:
- # new page.
- page_num += 1
- more, filled = self.place( rect)
- if positionfn:
- def positionfn2(position):
- # We add a `.page_num` member to the
- # `ElementPosition` instance.
- position.page_num = page_num
- positionfn(position)
- self.element_positions(positionfn2)
- if writer:
- if mediabox:
- # new page.
- if dev:
- if pagefn:
- pagefn(page_num, mediabox, dev, 1)
- writer.end_page()
- dev = writer.begin_page( mediabox)
- if pagefn:
- pagefn(page_num, mediabox, dev, 0)
- self.draw( dev, ctm)
- if not more:
- if pagefn:
- pagefn( page_num, mediabox, dev, 1)
- writer.end_page()
- else:
- self.draw(None, ctm)
- if not more:
- break
- @staticmethod
- def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
- positions = list()
- content = None
- # Iterate until stable.
- while 1:
- content_prev = content
- content = contentfn( positions)
- stable = False
- if content == content_prev:
- stable = True
- content2 = content
- story = Story(content2, user_css, em, archive)
- if add_header_ids:
- story.add_header_ids()
- positions = list()
- def positionfn2(position):
- #log(f"write_stabilized(): {stable=} {positionfn=} {position=}")
- positions.append(position)
- if stable and positionfn:
- positionfn(position)
- story.write(
- writer if stable else None,
- rectfn,
- positionfn2,
- pagefn,
- )
- if stable:
- break
- @staticmethod
- def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
- #log("write_stabilized_with_links()")
- stream = io.BytesIO()
- writer = DocumentWriter(stream)
- positions = []
- def positionfn2(position):
- #log(f"write_stabilized_with_links(): {position=}")
- positions.append(position)
- if positionfn:
- positionfn(position)
- Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids)
- writer.close()
- stream.seek(0)
- return Story.add_pdf_links(stream, positions)
- def write_with_links(self, rectfn, positionfn=None, pagefn=None):
- #log("write_with_links()")
- stream = io.BytesIO()
- writer = DocumentWriter(stream)
- positions = []
- def positionfn2(position):
- #log(f"write_with_links(): {position=}")
- positions.append(position)
- if positionfn:
- positionfn(position)
- self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn)
- writer.close()
- stream.seek(0)
- return Story.add_pdf_links(stream, positions)
- class FitResult:
- '''
- The result from a `Story.fit*()` method.
-
- Members:
-
- `big_enough`:
- `True` if the fit succeeded.
- `filled`:
- Tuple (x0, y0, x1, y1) from the last call to `Story.place()`. This
- will be wider than .rect if any single word (which we never split)
- was too wide for .rect.
- `more`:
- `False` if the fit succeeded.
- `numcalls`:
- Number of calls made to `self.place()`.
- `parameter`:
- The successful parameter value, or the largest failing value.
- `rect`:
- The pumupdf.Rect created from `parameter`.
- '''
- def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
- self.big_enough = big_enough
- self.filled = filled
- self.more = more
- self.numcalls = numcalls
- self.parameter = parameter
- self.rect = rect
-
- def __repr__(self):
- return (
- f' big_enough={self.big_enough}'
- f' filled={self.filled}'
- f' more={self.more}'
- f' numcalls={self.numcalls}'
- f' parameter={self.parameter}'
- f' rect={self.rect}'
- )
- def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False, flags=0):
- '''
- Finds optimal rect that contains the story `self`.
-
- Returns a `Story.FitResult` instance.
-
- On success, the last call to `self.place()` will have been with the
- returned rectangle, so `self.draw()` can be used directly.
-
- Args:
- :arg fn:
- A callable taking a floating point `parameter` and returning a
- `pymupdf.Rect()`. If the rect is empty, we assume the story will
- not fit and do not call `self.place()`.
- Must guarantee that `self.place()` behaves monotonically when
- given rect `fn(parameter`) as `parameter` increases. This
- usually means that both width and height increase or stay
- unchanged as `parameter` increases.
- :arg pmin:
- Minimum parameter to consider; `None` for -infinity.
- :arg pmax:
- Maximum parameter to consider; `None` for +infinity.
- :arg delta:
- Maximum error in returned `parameter`.
- :arg verbose:
- If true we output diagnostics.
- :arg flags:
- Passed to mupdf.fz_place_story_flags(). e.g.
- zero or `mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW`.
- '''
- def log(text):
- assert verbose
- message(f'fit(): {text}')
-
- assert isinstance(pmin, (int, float)) or pmin is None
- assert isinstance(pmax, (int, float)) or pmax is None
-
- class State:
- def __init__(self):
- self.pmin = pmin
- self.pmax = pmax
- self.pmin_result = None
- self.pmax_result = None
- self.result = None
- self.numcalls = 0
- if verbose:
- self.pmin0 = pmin
- self.pmax0 = pmax
- state = State()
-
- if verbose:
- log(f'starting. {state.pmin=} {state.pmax=}.')
-
- self.reset()
- def ret():
- if state.pmax is not None:
- if state.last_p != state.pmax:
- if verbose:
- log(f'Calling update() with pmax, because was overwritten by later calls.')
- big_enough = update(state.pmax)
- assert big_enough
- result = state.pmax_result
- else:
- result = state.pmin_result if state.pmin_result else Story.FitResult(numcalls=state.numcalls)
- if verbose:
- log(f'finished. {state.pmin0=} {state.pmax0=} {state.pmax=}: returning {result=}')
- return result
-
- def update(parameter):
- '''
- Evaluates `more, _ = self.place(fn(parameter))`. If `more` is
- false, then `rect` is big enough to contain `self` and we
- set `state.pmax=parameter` and return True. Otherwise we set
- `state.pmin=parameter` and return False.
- '''
- rect = fn(parameter)
- assert isinstance(rect, Rect), f'{type(rect)=} {rect=}'
- if rect.is_empty:
- big_enough = False
- result = Story.FitResult(parameter=parameter, numcalls=state.numcalls)
- if verbose:
- log(f'update(): not calling self.place() because rect is empty.')
- else:
- more, filled = self.place(rect, flags)
- state.numcalls += 1
- big_enough = not more
- result = Story.FitResult(
- filled=filled,
- more=more,
- numcalls=state.numcalls,
- parameter=parameter,
- rect=rect,
- big_enough=big_enough,
- )
- if verbose:
- log(f'update(): called self.place(): {state.numcalls:>2d}: {more=} {parameter=} {rect=}.')
- if big_enough:
- state.pmax = parameter
- state.pmax_result = result
- else:
- state.pmin = parameter
- state.pmin_result = result
- state.last_p = parameter
- return big_enough
- def opposite(p, direction):
- '''
- Returns same sign as `direction`, larger or smaller than `p` if
- direction is positive or negative respectively.
- '''
- if p is None or p==0:
- return direction
- if direction * p > 0:
- return 2 * p
- return -p
-
- if state.pmin is None:
- # Find an initial finite pmin value.
- if verbose: log(f'finding pmin.')
- parameter = opposite(state.pmax, -1)
- while 1:
- if not update(parameter):
- break
- parameter *= 2
- else:
- if update(state.pmin):
- if verbose: log(f'{state.pmin=} is big enough.')
- return ret()
-
- if state.pmax is None:
- # Find an initial finite pmax value.
- if verbose: log(f'finding pmax.')
- parameter = opposite(state.pmin, +1)
- while 1:
- if update(parameter):
- break
- parameter *= 2
- else:
- if not update(state.pmax):
- # No solution possible.
- state.pmax = None
- if verbose: log(f'No solution possible {state.pmax=}.')
- return ret()
-
- # Do binary search in pmin..pmax.
- if verbose: log(f'doing binary search with {state.pmin=} {state.pmax=}.')
- while 1:
- if state.pmax - state.pmin < delta:
- return ret()
- parameter = (state.pmin + state.pmax) / 2
- update(parameter)
- def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False, flags=0):
- '''
- Finds smallest value `scale` in range `scale_min..scale_max` where
- `scale * rect` is large enough to contain the story `self`.
- Returns a `Story.FitResult` instance with `.parameter` set to `scale`.
- :arg width:
- width of rect.
- :arg height:
- height of rect.
- :arg scale_min:
- Minimum scale to consider; must be >= 0.
- :arg scale_max:
- Maximum scale to consider, must be >= scale_min or `None` for
- infinite.
- :arg delta:
- Maximum error in returned scale.
- :arg verbose:
- If true we output diagnostics.
- :arg flags:
- Passed to Story.place().
- '''
- x0, y0, x1, y1 = rect
- width = x1 - x0
- height = y1 - y0
- def fn(scale):
- return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
- return self.fit(fn, scale_min, scale_max, delta, verbose, flags)
- def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
- '''
- Finds smallest height in range `height_min..height_max` where a rect
- with size `(width, height)` is large enough to contain the story
- `self`.
- Returns a `Story.FitResult` instance.
- :arg width:
- width of rect.
- :arg height_min:
- Minimum height to consider; must be >= 0.
- :arg height_max:
- Maximum height to consider, must be >= height_min or `None` for
- infinite.
- :arg origin:
- `(x0, y0)` of rect.
- :arg delta:
- Maximum error in returned height.
- :arg verbose:
- If true we output diagnostics.
- '''
- x0, y0 = origin
- x1 = x0 + width
- def fn(height):
- return Rect(x0, y0, x1, y0+height)
- return self.fit(fn, height_min, height_max, delta, verbose)
- def fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False):
- '''
- Finds smallest width in range `width_min..width_max` where a rect with size
- `(width, height)` is large enough to contain the story `self`.
- Returns a `Story.FitResult` instance.
- Returns a `FitResult` instance.
- :arg height:
- height of rect.
- :arg width_min:
- Minimum width to consider; must be >= 0.
- :arg width_max:
- Maximum width to consider, must be >= width_min or `None` for
- infinite.
- :arg origin:
- `(x0, y0)` of rect.
- :arg delta:
- Maximum error in returned width.
- :arg verbose:
- If true we output diagnostics.
- '''
- x0, y0 = origin
- y1 = y0 + height
- def fn(width):
- return Rect(x0, y0, x0+width, y1)
- return self.fit(fn, width_min, width_max, delta, verbose)
- class TextPage:
- def __init__(self, *args):
- if args_match(args, mupdf.FzRect):
- mediabox = args[0]
- self.this = mupdf.FzStextPage( mediabox)
- elif args_match(args, mupdf.FzStextPage):
- self.this = args[0]
- else:
- raise Exception(f'Unrecognised args: {args}')
- self.thisown = True
- self.parent = None
- def _extractText(self, format_):
- this_tpage = self.this
- res = mupdf.fz_new_buffer(1024)
- out = mupdf.FzOutput( res)
- # fixme: mupdfwrap.py thinks fz_output is not copyable, possibly
- # because there is no .refs member visible and no fz_keep_output() fn,
- # although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer()
- # doesn't convert the returned fz_output* into a mupdf.FzOutput.
- #out = mupdf.FzOutput(out)
- if format_ == 1:
- mupdf.fz_print_stext_page_as_html(out, this_tpage, 0)
- elif format_ == 3:
- mupdf.fz_print_stext_page_as_xml(out, this_tpage, 0)
- elif format_ == 4:
- mupdf.fz_print_stext_page_as_xhtml(out, this_tpage, 0)
- else:
- JM_print_stext_page_as_text(res, this_tpage)
- out.fz_close_output()
- text = JM_EscapeStrFromBuffer(res)
- return text
- def _getNewBlockList(self, page_dict, raw):
- JM_make_textpage_dict(self.this, page_dict, raw)
- def _textpage_dict(self, raw=False):
- page_dict = {"width": self.rect.width, "height": self.rect.height}
- self._getNewBlockList(page_dict, raw)
- return page_dict
- def extractBLOCKS(self):
- """Return a list with text block information."""
- if 1 or g_use_extra:
- return extra.extractBLOCKS(self.this)
- block_n = -1
- this_tpage = self.this
- tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
- res = mupdf.fz_new_buffer(1024)
- lines = []
- for block in this_tpage:
- block_n += 1
- blockrect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- mupdf.fz_clear_buffer(res) # set text buffer to empty
- line_n = -1
- last_char = 0
- for line in block:
- line_n += 1
- linerect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- for ch in line:
- cbbox = JM_char_bbox(line, ch)
- if (not JM_rects_overlap(tp_rect, cbbox)
- and not mupdf.fz_is_infinite_rect(tp_rect)
- ):
- continue
- JM_append_rune(res, ch.m_internal.c)
- last_char = ch.m_internal.c
- linerect = mupdf.fz_union_rect(linerect, cbbox)
- if last_char != 10 and not mupdf.fz_is_empty_rect(linerect):
- mupdf.fz_append_byte(res, 10)
- blockrect = mupdf.fz_union_rect(blockrect, linerect)
- text = JM_EscapeStrFromBuffer(res)
- elif (JM_rects_overlap(tp_rect, block.m_internal.bbox)
- or mupdf.fz_is_infinite_rect(tp_rect)
- ):
- img = block.i_image()
- cs = img.colorspace()
- text = "<image: %s, width: %d, height: %d, bpc: %d>" % (
- mupdf.fz_colorspace_name(cs),
- img.w(), img.h(), img.bpc()
- )
- blockrect = mupdf.fz_union_rect(blockrect, mupdf.FzRect(block.m_internal.bbox))
- if not mupdf.fz_is_empty_rect(blockrect):
- litem = (
- blockrect.x0,
- blockrect.y0,
- blockrect.x1,
- blockrect.y1,
- text,
- block_n,
- block.m_internal.type,
- )
- lines.append(litem)
- return lines
- def extractDICT(self, cb=None, sort=False) -> dict:
- """Return page content as a Python dict of images and text spans."""
- val = self._textpage_dict(raw=False)
- if cb is not None:
- val["width"] = cb.width
- val["height"] = cb.height
- if sort:
- blocks = val["blocks"]
- blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
- val["blocks"] = blocks
- return val
- def extractHTML(self) -> str:
- """Return page content as a HTML string."""
- return self._extractText(1)
- def extractIMGINFO(self, hashes=0):
- """Return a list with image meta information."""
- block_n = -1
- this_tpage = self.this
- rc = []
- for block in this_tpage:
- block_n += 1
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- continue
- img = block.i_image()
- img_size = 0
- mask = img.mask()
- if mask.m_internal:
- has_mask = True
- else:
- has_mask = False
- compr_buff = mupdf.fz_compressed_image_buffer(img)
- if compr_buff.m_internal:
- img_size = compr_buff.fz_compressed_buffer_size()
- compr_buff = None
- if hashes:
- r = mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
- assert mupdf.fz_is_infinite_irect(r)
- m = mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0)
- pix, w, h = mupdf.fz_get_pixmap_from_image(img, r, m)
- digest = mupdf.fz_md5_pixmap2(pix)
- digest = bytes(digest)
- if img_size == 0:
- img_size = img.w() * img.h() * img.n()
- cs = mupdf.FzColorspace(mupdf.ll_fz_keep_colorspace(img.m_internal.colorspace))
- block_dict = dict()
- block_dict[dictkey_number] = block_n
- block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
- block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
- block_dict[dictkey_width] = img.w()
- block_dict[dictkey_height] = img.h()
- block_dict[dictkey_colorspace] = mupdf.fz_colorspace_n(cs)
- block_dict[dictkey_cs_name] = mupdf.fz_colorspace_name(cs)
- block_dict[dictkey_xres] = img.xres()
- block_dict[dictkey_yres] = img.yres()
- block_dict[dictkey_bpc] = img.bpc()
- block_dict[dictkey_size] = img_size
- if hashes:
- block_dict["digest"] = digest
- block_dict["has-mask"] = has_mask
- rc.append(block_dict)
- return rc
- def extractJSON(self, cb=None, sort=False) -> str:
- """Return 'extractDICT' converted to JSON format."""
- import base64
- import json
- val = self._textpage_dict(raw=False)
- class b64encode(json.JSONEncoder):
- def default(self, s):
- if type(s) in (bytes, bytearray):
- return base64.b64encode(s).decode()
- if cb is not None:
- val["width"] = cb.width
- val["height"] = cb.height
- if sort:
- blocks = val["blocks"]
- blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
- val["blocks"] = blocks
-
- val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
- return val
- def extractRAWDICT(self, cb=None, sort=False) -> dict:
- """Return page content as a Python dict of images and text characters."""
- val = self._textpage_dict(raw=True)
- if cb is not None:
- val["width"] = cb.width
- val["height"] = cb.height
- if sort:
- blocks = val["blocks"]
- blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
- val["blocks"] = blocks
- return val
- def extractRAWJSON(self, cb=None, sort=False) -> str:
- """Return 'extractRAWDICT' converted to JSON format."""
- import base64
- import json
- val = self._textpage_dict(raw=True)
- class b64encode(json.JSONEncoder):
- def default(self,s):
- if type(s) in (bytes, bytearray):
- return base64.b64encode(s).decode()
- if cb is not None:
- val["width"] = cb.width
- val["height"] = cb.height
- if sort:
- blocks = val["blocks"]
- blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
- val["blocks"] = blocks
- val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
- return val
- def extractSelection(self, pointa, pointb):
- a = JM_point_from_py(pointa)
- b = JM_point_from_py(pointb)
- found = mupdf.fz_copy_selection(self.this, a, b, 0)
- return found
- def extractText(self, sort=False) -> str:
- """Return simple, bare text on the page."""
- if not sort:
- return self._extractText(0)
- blocks = self.extractBLOCKS()[:]
- blocks.sort(key=lambda b: (b[3], b[0]))
- return "".join([b[4] for b in blocks])
- def extractTextbox(self, rect):
- this_tpage = self.this
- assert isinstance(this_tpage, mupdf.FzStextPage)
- area = JM_rect_from_py(rect)
- found = JM_copy_rectangle(this_tpage, area)
- rc = PyUnicode_DecodeRawUnicodeEscape(found)
- return rc
- def extractWORDS(self, delimiters=None):
- """Return a list with text word information."""
- if 1 or g_use_extra:
- return extra.extractWORDS(self.this, delimiters)
- buflen = 0
- last_char_rtl = 0
- block_n = -1
- wbbox = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word bbox
- this_tpage = self.this
- tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
- lines = None
- buff = mupdf.fz_new_buffer(64)
- lines = []
- for block in this_tpage:
- block_n += 1
- if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
- continue
- line_n = -1
- for line in block:
- line_n += 1
- word_n = 0 # word counter per line
- mupdf.fz_clear_buffer(buff) # reset word buffer
- buflen = 0 # reset char counter
- for ch in line:
- cbbox = JM_char_bbox(line, ch)
- if (not JM_rects_overlap(tp_rect, cbbox)
- and not mupdf.fz_is_infinite_rect(tp_rect)
- ):
- continue
- if buflen == 0 and ch.m_internal.c == 0x200d:
- # ZERO WIDTH JOINER cannot start a word
- continue
- word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
- this_char_rtl = JM_is_rtl_char(ch.m_internal.c)
- if word_delimiter or this_char_rtl != last_char_rtl:
- if buflen == 0 and word_delimiter:
- continue # skip delimiters at line start
- if not mupdf.fz_is_empty_rect(wbbox):
- word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
- mupdf.fz_clear_buffer(buff)
- buflen = 0 # reset char counter
- if word_delimiter:
- continue
- # append one unicode character to the word
- JM_append_rune(buff, ch.m_internal.c)
- last_char_rtl = this_char_rtl
- buflen += 1
- # enlarge word bbox
- wbbox = mupdf.fz_union_rect(wbbox, JM_char_bbox(line, ch))
- if buflen and not mupdf.fz_is_empty_rect(wbbox):
- word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
- buflen = 0
- return lines
- def extractXHTML(self) -> str:
- """Return page content as a XHTML string."""
- return self._extractText(4)
- def extractXML(self) -> str:
- """Return page content as a XML string."""
- return self._extractText(3)
- def poolsize(self):
- """TextPage current poolsize."""
- tpage = self.this
- pool = mupdf.Pool(tpage.m_internal.pool)
- size = mupdf.fz_pool_size( pool)
- pool.m_internal = None # Ensure that pool's destructor does not free the pool.
- return size
- @property
- def rect(self):
- """Page rectangle."""
- this_tpage = self.this
- mediabox = this_tpage.m_internal.mediabox
- val = JM_py_from_rect(mediabox)
- val = Rect(val)
- return val
- def search(self, needle, hit_max=0, quads=1):
- """Locate 'needle' returning rects or quads."""
- val = JM_search_stext_page(self.this, needle)
- if not val:
- return val
- items = len(val)
- for i in range(items): # change entries to quads or rects
- q = Quad(val[i])
- if quads:
- val[i] = q
- else:
- val[i] = q.rect
- if quads:
- return val
- i = 0 # join overlapping rects on the same line
- while i < items - 1:
- v1 = val[i]
- v2 = val[i + 1]
- if v1.y1 != v2.y1 or (v1 & v2).is_empty:
- i += 1
- continue # no overlap on same line
- val[i] = v1 | v2 # join rectangles
- del val[i + 1] # remove v2
- items -= 1 # reduce item count
- return val
- extractTEXT = extractText
- class TextWriter:
- def __init__(self, page_rect, opacity=1, color=None):
- """Stores text spans for later output on compatible PDF pages."""
- self.this = mupdf.fz_new_text()
- self.opacity = opacity
- self.color = color
- self.rect = Rect(page_rect)
- self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height)
- self.ictm = ~self.ctm
- self.last_point = Point()
- self.last_point.__doc__ = "Position following last text insertion."
- self.text_rect = Rect()
-
- self.text_rect.__doc__ = "Accumulated area of text spans."
- self.used_fonts = set()
- self.thisown = True
- @property
- def _bbox(self):
- val = JM_py_from_rect( mupdf.fz_bound_text( self.this, mupdf.FzStrokeState(None), mupdf.FzMatrix()))
- val = Rect(val)
- return val
- def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left=0, small_caps=0):
- """Store 'text' at point 'pos' using 'font' and 'fontsize'."""
- pos = Point(pos) * self.ictm
- #log( '{font=}')
- if font is None:
- font = Font("helv")
- if not font.is_writable:
- if 0:
- log( '{font.this.m_internal.name=}')
- log( '{font.this.m_internal.t3matrix=}')
- log( '{font.this.m_internal.bbox=}')
- log( '{font.this.m_internal.glyph_count=}')
- log( '{font.this.m_internal.use_glyph_bbox=}')
- log( '{font.this.m_internal.width_count=}')
- log( '{font.this.m_internal.width_default=}')
- log( '{font.this.m_internal.has_digest=}')
- log( 'Unsupported font {font.name=}')
- if mupdf_cppyy:
- import cppyy
- log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}')
- raise ValueError("Unsupported font '%s'." % font.name)
- if right_to_left:
- text = self.clean_rtl(text)
- text = "".join(reversed(text))
- right_to_left = 0
- lang = mupdf.fz_text_language_from_string(language)
- p = JM_point_from_py(pos)
- trm = mupdf.fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y)
- markup_dir = 0
- wmode = 0
- if small_caps == 0:
- trm = mupdf.fz_show_string( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
- else:
- trm = JM_show_string_cs( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
- val = JM_py_from_matrix(trm)
- self.last_point = Point(val[-2:]) * self.ctm
- self.text_rect = self._bbox * self.ctm
- val = self.text_rect, self.last_point
- if font.flags["mono"] == 1:
- self.used_fonts.add(font)
- return val
- def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False):
- lheight = fontsize * 1.2
- for c in text:
- self.append(pos, c, font=font, fontsize=fontsize,
- language=language, small_caps=small_caps)
- pos.y += lheight
- return self.text_rect, self.last_point
- def clean_rtl(self, text):
- """Revert the sequence of Latin text parts.
- Text with right-to-left writing direction (Arabic, Hebrew) often
- contains Latin parts, which are written in left-to-right: numbers, names,
- etc. For output as PDF text we need *everything* in right-to-left.
- E.g. an input like "<arabic> ABCDE FG HIJ <arabic> KL <arabic>" will be
- converted to "<arabic> JIH GF EDCBA <arabic> LK <arabic>". The Arabic
- parts remain untouched.
- Args:
- text: str
- Returns:
- Massaged string.
- """
- if not text:
- return text
- # split into words at space boundaries
- words = text.split(" ")
- idx = []
- for i in range(len(words)):
- w = words[i]
- # revert character sequence for Latin only words
- if not (len(w) < 2 or max([ord(c) for c in w]) > 255):
- words[i] = "".join(reversed(w))
- idx.append(i) # stored index of Latin word
- # adjacent Latin words must revert their sequence, too
- idx2 = [] # store indices of adjacent Latin words
- for i in range(len(idx)):
- if idx2 == []: # empty yet?
- idx2.append(idx[i]) # store Latin word number
- elif idx[i] > idx2[-1] + 1: # large gap to last?
- if len(idx2) > 1: # at least two consecutives?
- words[idx2[0] : idx2[-1] + 1] = reversed(
- words[idx2[0] : idx2[-1] + 1]
- ) # revert their sequence
- idx2 = [idx[i]] # re-initialize
- elif idx[i] == idx2[-1] + 1: # new adjacent Latin word
- idx2.append(idx[i])
- text = " ".join(words)
- return text
- def fill_textbox(
- writer: 'TextWriter',
- rect: rect_like,
- text: typing.Union[str, list],
- pos: point_like = None,
- font: typing.Optional[Font] = None,
- fontsize: float = 11,
- lineheight: OptFloat = None,
- align: int = 0,
- warn: bool = None,
- right_to_left: bool = False,
- small_caps: bool = False,
- ) -> tuple:
- """Fill a rectangle with text.
- Args:
- writer: pymupdf.TextWriter object (= "self")
- rect: rect-like to receive the text.
- text: string or list/tuple of strings.
- pos: point-like start position of first word.
- font: pymupdf.Font object (default pymupdf.Font('helv')).
- fontsize: the fontsize.
- lineheight: overwrite the font property
- align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
- warn: (bool) text overflow action: none, warn, or exception
- right_to_left: (bool) indicate right-to-left language.
- """
- rect = Rect(rect)
- if rect.is_empty:
- raise ValueError("fill rect must not empty.")
- if type(font) is not Font:
- font = Font("helv")
- def textlen(x):
- """Return length of a string."""
- return font.text_length(
- x, fontsize=fontsize, small_caps=small_caps
- ) # abbreviation
- def char_lengths(x):
- """Return list of single character lengths for a string."""
- return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)
- def append_this(pos, text):
- ret = writer.append(
- pos, text, font=font, fontsize=fontsize, small_caps=small_caps
- )
- return ret
- tolerance = fontsize * 0.2 # extra distance to left border
- space_len = textlen(" ")
- std_width = rect.width - tolerance
- std_start = rect.x0 + tolerance
- def norm_words(width, words):
- """Cut any word in pieces no longer than 'width'."""
- nwords = []
- word_lengths = []
- for w in words:
- wl_lst = char_lengths(w)
- wl = sum(wl_lst)
- if wl <= width: # nothing to do - copy over
- nwords.append(w)
- word_lengths.append(wl)
- continue
- # word longer than rect width - split it in parts
- n = len(wl_lst)
- while n > 0:
- wl = sum(wl_lst[:n])
- if wl <= width:
- nwords.append(w[:n])
- word_lengths.append(wl)
- w = w[n:]
- wl_lst = wl_lst[n:]
- n = len(wl_lst)
- else:
- n -= 1
- return nwords, word_lengths
- def output_justify(start, line):
- """Justified output of a line."""
- # ignore leading / trailing / multiple spaces
- words = [w for w in line.split(" ") if w != ""]
- nwords = len(words)
- if nwords == 0:
- return
- if nwords == 1: # single word cannot be justified
- append_this(start, words[0])
- return
- tl = sum([textlen(w) for w in words]) # total word lengths
- gaps = nwords - 1 # number of word gaps
- gapl = (std_width - tl) / gaps # width of each gap
- for w in words:
- _, lp = append_this(start, w) # output one word
- start.x = lp.x + gapl # next start at word end plus gap
- return
- asc = font.ascender
- dsc = font.descender
- if not lineheight:
- if asc - dsc <= 1:
- lheight = 1.2
- else:
- lheight = asc - dsc
- else:
- lheight = lineheight
- LINEHEIGHT = fontsize * lheight # effective line height
- width = std_width # available horizontal space
- # starting point of text
- if pos is not None:
- pos = Point(pos)
- else: # default is just below rect top-left
- pos = rect.tl + (tolerance, fontsize * asc)
- if pos not in rect:
- raise ValueError("Text must start in rectangle.")
- # calculate displacement factor for alignment
- if align == TEXT_ALIGN_CENTER:
- factor = 0.5
- elif align == TEXT_ALIGN_RIGHT:
- factor = 1.0
- else:
- factor = 0
- # split in lines if just a string was given
- if type(text) is str:
- textlines = text.splitlines()
- else:
- textlines = []
- for line in text:
- textlines.extend(line.splitlines())
- max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1
- new_lines = [] # the final list of textbox lines
- no_justify = [] # no justify for these line numbers
- for i, line in enumerate(textlines):
- if line in ("", " "):
- new_lines.append((line, space_len))
- width = rect.width - tolerance
- no_justify.append((len(new_lines) - 1))
- continue
- if i == 0:
- width = rect.x1 - pos.x
- else:
- width = rect.width - tolerance
- if right_to_left: # reverses Arabic / Hebrew text front to back
- line = writer.clean_rtl(line)
- tl = textlen(line)
- if tl <= width: # line short enough
- new_lines.append((line, tl))
- no_justify.append((len(new_lines) - 1))
- continue
- # we need to split the line in fitting parts
- words = line.split(" ") # the words in the line
- # cut in parts any words that are longer than rect width
- words, word_lengths = norm_words(width, words)
- n = len(words)
- while True:
- line0 = " ".join(words[:n])
- wl = sum(word_lengths[:n]) + space_len * (n - 1)
- if wl <= width:
- new_lines.append((line0, wl))
- words = words[n:]
- word_lengths = word_lengths[n:]
- n = len(words)
- line0 = None
- else:
- n -= 1
- if len(words) == 0:
- break
- assert n
- # -------------------------------------------------------------------------
- # List of lines created. Each item is (text, tl), where 'tl' is the PDF
- # output length (float) and 'text' is the text. Except for justified text,
- # this is output-ready.
- # -------------------------------------------------------------------------
- nlines = len(new_lines)
- if nlines > max_lines:
- msg = "Only fitting %i of %i lines." % (max_lines, nlines)
- if warn is None:
- pass
- elif warn:
- message("Warning: " + msg)
- else:
- raise ValueError(msg)
- start = Point()
- no_justify += [len(new_lines) - 1] # no justifying of last line
- for i in range(max_lines):
- try:
- line, tl = new_lines.pop(0)
- except IndexError:
- if g_exceptions_verbose >= 2: exception_info()
- break
- if right_to_left: # Arabic, Hebrew
- line = "".join(reversed(line))
- if i == 0: # may have different start for first line
- start = pos
- if align == TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
- output_justify(start, line)
- start.x = std_start
- start.y += LINEHEIGHT
- continue
- if i > 0 or pos.x == std_start: # left, center, right alignments
- start.x += (width - tl) * factor
- append_this(start, line)
- start.x = std_start
- start.y += LINEHEIGHT
- return new_lines # return non-written lines
- def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
- """Write the text to a PDF page having the TextWriter's page size.
- Args:
- page: a PDF page having same size.
- color: override text color.
- opacity: override transparency.
- overlay: put in foreground or background.
- morph: tuple(Point, Matrix), apply a matrix with a fixpoint.
- matrix: Matrix to be used instead of 'morph' argument.
- render_mode: (int) PDF render mode operator 'Tr'.
- """
- CheckParent(page)
- if abs(self.rect - page.rect) > 1e-3:
- raise ValueError("incompatible page rect")
- if morph is not None:
- if (type(morph) not in (tuple, list)
- or type(morph[0]) is not Point
- or type(morph[1]) is not Matrix
- ):
- raise ValueError("morph must be (Point, Matrix) or None")
- if matrix is not None and morph is not None:
- raise ValueError("only one of matrix, morph is allowed")
- if getattr(opacity, "__float__", None) is None or opacity == -1:
- opacity = self.opacity
- if color is None:
- color = self.color
- if 1:
- pdfpage = page._pdf_page()
- alpha = 1
- if opacity >= 0 and opacity < 1:
- alpha = opacity
- ncol = 1
- dev_color = [0, 0, 0, 0]
- if color:
- ncol, dev_color = JM_color_FromSequence(color)
- if ncol == 3:
- colorspace = mupdf.fz_device_rgb()
- elif ncol == 4:
- colorspace = mupdf.fz_device_cmyk()
- else:
- colorspace = mupdf.fz_device_gray()
- resources = mupdf.pdf_new_dict(pdfpage.doc(), 5)
- contents = mupdf.fz_new_buffer(1024)
- dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents)
- #log( '=== {dev_color!r=}')
- mupdf.fz_fill_text(
- dev,
- self.this,
- mupdf.FzMatrix(),
- colorspace,
- dev_color,
- alpha,
- mupdf.FzColorParams(mupdf.fz_default_color_params),
- )
- mupdf.fz_close_device( dev)
- # copy generated resources into the one of the page
- max_nums = JM_merge_resources( pdfpage, resources)
- cont_string = JM_EscapeStrFromBuffer( contents)
- result = (max_nums, cont_string)
- val = result
- max_nums = val[0]
- content = val[1]
- max_alp, max_font = max_nums
- old_cont_lines = content.splitlines()
- optcont = page._get_optional_content(oc)
- if optcont is not None:
- bdc = "/OC /%s BDC" % optcont
- emc = "EMC"
- else:
- bdc = emc = ""
- new_cont_lines = ["q"]
- if bdc:
- new_cont_lines.append(bdc)
- cb = page.cropbox_position
- if page.rotation in (90, 270):
- delta = page.rect.height - page.rect.width
- else:
- delta = 0
- mb = page.mediabox
- if bool(cb) or mb.y0 != 0 or delta != 0:
- new_cont_lines.append(f"1 0 0 1 {_format_g((cb.x, cb.y + mb.y0 - delta))} cm")
- if morph:
- p = morph[0] * self.ictm
- delta = Matrix(1, 1).pretranslate(p.x, p.y)
- matrix = ~delta * morph[1] * delta
- if morph or matrix:
- new_cont_lines.append(_format_g(JM_TUPLE(matrix)) + " cm")
- for line in old_cont_lines:
- if line.endswith(" cm"):
- continue
- if line == "BT":
- new_cont_lines.append(line)
- new_cont_lines.append("%i Tr" % render_mode)
- continue
- if line.endswith(" gs"):
- alp = int(line.split()[0][4:]) + max_alp
- line = "/Alp%i gs" % alp
- elif line.endswith(" Tf"):
- temp = line.split()
- fsize = float(temp[1])
- if render_mode != 0:
- w = fsize * 0.05
- else:
- w = 1
- new_cont_lines.append(_format_g(w) + " w")
- font = int(temp[0][2:]) + max_font
- line = " ".join(["/F%i" % font] + temp[1:])
- elif line.endswith(" rg"):
- new_cont_lines.append(line.replace("rg", "RG"))
- elif line.endswith(" g"):
- new_cont_lines.append(line.replace(" g", " G"))
- elif line.endswith(" k"):
- new_cont_lines.append(line.replace(" k", " K"))
- new_cont_lines.append(line)
- if emc:
- new_cont_lines.append(emc)
- new_cont_lines.append("Q\n")
- content = "\n".join(new_cont_lines).encode("utf-8")
- TOOLS._insert_contents(page, content, overlay=overlay)
- val = None
- for font in self.used_fonts:
- repair_mono_font(page, font)
- return val
- class IRect:
- """
- IRect() - all zeros
- IRect(x0, y0, x1, y1) - 4 coordinates
- IRect(top-left, x1, y1) - point and 2 coordinates
- IRect(x0, y0, bottom-right) - 2 coordinates and point
- IRect(top-left, bottom-right) - 2 points
- IRect(sequ) - new from sequence or rect-like
- """
- def __add__(self, p):
- return Rect.__add__(self, p).round()
- def __and__(self, x):
- return Rect.__and__(self, x).round()
- def __contains__(self, x):
- return Rect.__contains__(self, x)
- def __eq__(self, r):
- if not hasattr(r, "__len__"):
- return False
- return len(r) == 4 and self.x0 == r[0] and self.y0 == r[1] and self.x1 == r[2] and self.y1 == r[3]
- def __getitem__(self, i):
- return (self.x0, self.y0, self.x1, self.y1)[i]
- def __hash__(self):
- return hash(tuple(self))
- def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
- self.x0, self.y0, self.x1, self.y1 = util_make_irect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
- def __len__(self):
- return 4
- def __mul__(self, m):
- return Rect.__mul__(self, m).round()
- def __neg__(self):
- return IRect(-self.x0, -self.y0, -self.x1, -self.y1)
- def __or__(self, x):
- return Rect.__or__(self, x).round()
- def __pos__(self):
- return IRect(self)
- def __repr__(self):
- return "IRect" + str(tuple(self))
- def __setitem__(self, i, v):
- v = int(v)
- if i == 0: self.x0 = v
- elif i == 1: self.y0 = v
- elif i == 2: self.x1 = v
- elif i == 3: self.y1 = v
- else:
- raise IndexError("index out of range")
- return None
- def __sub__(self, p):
- return Rect.__sub__(self, p).round()
- def __truediv__(self, m):
- return Rect.__truediv__(self, m).round()
- @property
- def bottom_left(self):
- """Bottom-left corner."""
- return Point(self.x0, self.y1)
- @property
- def bottom_right(self):
- """Bottom-right corner."""
- return Point(self.x1, self.y1)
- @property
- def height(self):
- return max(0, self.y1 - self.y0)
- def contains(self, x):
- """Check if x is in the rectangle."""
- return self.__contains__(x)
- def get_area(self, *args) -> float:
- """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
- return _rect_area(self.width, self.height, args)
- def include_point(self, p):
- """Extend rectangle to include point p."""
- rect = self.rect.include_point(p)
- return rect.irect
- def include_rect(self, r):
- """Extend rectangle to include rectangle r."""
- rect = self.rect.include_rect(r)
- return rect.irect
- def intersect(self, r):
- """Restrict rectangle to intersection with rectangle r."""
- return Rect.intersect(self, r).round()
- def intersects(self, x):
- return Rect.intersects(self, x)
- @property
- def is_empty(self):
- """True if rectangle area is empty."""
- return self.x0 >= self.x1 or self.y0 >= self.y1
- @property
- def is_infinite(self):
- """True if rectangle is infinite."""
- return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
- @property
- def is_valid(self):
- """True if rectangle is valid."""
- return self.x0 <= self.x1 and self.y0 <= self.y1
- def morph(self, p, m):
- """Morph with matrix-like m and point-like p.
- Returns a new quad."""
- if self.is_infinite:
- return INFINITE_QUAD()
- return self.quad.morph(p, m)
- def norm(self):
- return math.sqrt(sum([c*c for c in self]))
- def normalize(self):
- """Replace rectangle with its valid version."""
- if self.x1 < self.x0:
- self.x0, self.x1 = self.x1, self.x0
- if self.y1 < self.y0:
- self.y0, self.y1 = self.y1, self.y0
- return self
- @property
- def quad(self):
- """Return Quad version of rectangle."""
- return Quad(self.tl, self.tr, self.bl, self.br)
- @property
- def rect(self):
- return Rect(self)
- @property
- def top_left(self):
- """Top-left corner."""
- return Point(self.x0, self.y0)
- @property
- def top_right(self):
- """Top-right corner."""
- return Point(self.x1, self.y0)
- def torect(self, r):
- """Return matrix that converts to target rect."""
- r = Rect(r)
- if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
- raise ValueError("rectangles must be finite and not empty")
- return (
- Matrix(1, 0, 0, 1, -self.x0, -self.y0)
- * Matrix(r.width / self.width, r.height / self.height)
- * Matrix(1, 0, 0, 1, r.x0, r.y0)
- )
- def transform(self, m):
- return Rect.transform(self, m).round()
- @property
- def width(self):
- return max(0, self.x1 - self.x0)
- br = bottom_right
- bl = bottom_left
- tl = top_left
- tr = top_right
- # Data
- #
- if 1:
- _self = sys.modules[__name__]
- if 1:
- for _name, _value in mupdf.__dict__.items():
- if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
- if _name.startswith('PDF_ENUM_NAME_'):
- # Not a simple enum.
- pass
- else:
- #assert not inspect.isroutine(value)
- #log(f'importing {_name=} {_value=}.')
- setattr(_self, _name, _value)
- #log(f'{getattr( self, name, None)=}')
- else:
- # This is slow due to importing inspect, e.g. 0.019 instead of 0.004.
- for _name, _value in inspect.getmembers(mupdf):
- if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
- if _name.startswith('PDF_ENUM_NAME_'):
- # Not a simple enum.
- pass
- else:
- #assert not inspect.isroutine(value)
- #log(f'importing {name}')
- setattr(_self, _name, _value)
- #log(f'{getattr( self, name, None)=}')
-
- # This is a macro so not preserved in mupdf C++/Python bindings.
- #
- PDF_SIGNATURE_DEFAULT_APPEARANCE = (0
- | mupdf.PDF_SIGNATURE_SHOW_LABELS
- | mupdf.PDF_SIGNATURE_SHOW_DN
- | mupdf.PDF_SIGNATURE_SHOW_DATE
- | mupdf.PDF_SIGNATURE_SHOW_TEXT_NAME
- | mupdf.PDF_SIGNATURE_SHOW_GRAPHIC_NAME
- | mupdf.PDF_SIGNATURE_SHOW_LOGO
- )
- #UCDN_SCRIPT_ADLAM = mupdf.UCDN_SCRIPT_ADLAM
- #setattr(self, 'UCDN_SCRIPT_ADLAM', mupdf.UCDN_SCRIPT_ADLAM)
-
- assert mupdf.UCDN_EAST_ASIAN_H == 1
-
- # Flake8 incorrectly fails next two lines because we've dynamically added
- # items to self.
- assert PDF_TX_FIELD_IS_MULTILINE == mupdf.PDF_TX_FIELD_IS_MULTILINE # noqa: F821
- assert UCDN_SCRIPT_ADLAM == mupdf.UCDN_SCRIPT_ADLAM # noqa: F821
- del _self, _name, _value
- AnyType = typing.Any
- Base14_fontnames = (
- "Courier",
- "Courier-Oblique",
- "Courier-Bold",
- "Courier-BoldOblique",
- "Helvetica",
- "Helvetica-Oblique",
- "Helvetica-Bold",
- "Helvetica-BoldOblique",
- "Times-Roman",
- "Times-Italic",
- "Times-Bold",
- "Times-BoldItalic",
- "Symbol",
- "ZapfDingbats",
- )
- Base14_fontdict = {}
- for f in Base14_fontnames:
- Base14_fontdict[f.lower()] = f
- Base14_fontdict["helv"] = "Helvetica"
- Base14_fontdict["heit"] = "Helvetica-Oblique"
- Base14_fontdict["hebo"] = "Helvetica-Bold"
- Base14_fontdict["hebi"] = "Helvetica-BoldOblique"
- Base14_fontdict["cour"] = "Courier"
- Base14_fontdict["coit"] = "Courier-Oblique"
- Base14_fontdict["cobo"] = "Courier-Bold"
- Base14_fontdict["cobi"] = "Courier-BoldOblique"
- Base14_fontdict["tiro"] = "Times-Roman"
- Base14_fontdict["tibo"] = "Times-Bold"
- Base14_fontdict["tiit"] = "Times-Italic"
- Base14_fontdict["tibi"] = "Times-BoldItalic"
- Base14_fontdict["symb"] = "Symbol"
- Base14_fontdict["zadb"] = "ZapfDingbats"
- EPSILON = 1e-5
- FLT_EPSILON = 1e-5
- # largest 32bit integers surviving C float conversion roundtrips
- # used by MuPDF to define infinite rectangles
- FZ_MIN_INF_RECT = -0x80000000
- FZ_MAX_INF_RECT = 0x7fffff80
- JM_annot_id_stem = "fitz"
- JM_mupdf_warnings_store = []
- JM_mupdf_show_errors = 1
- JM_mupdf_show_warnings = 0
- # ------------------------------------------------------------------------------
- # Image recompression constants
- # ------------------------------------------------------------------------------
- FZ_RECOMPRESS_NEVER = mupdf.FZ_RECOMPRESS_NEVER
- FZ_RECOMPRESS_SAME = mupdf.FZ_RECOMPRESS_SAME
- FZ_RECOMPRESS_LOSSLESS = mupdf.FZ_RECOMPRESS_LOSSLESS
- FZ_RECOMPRESS_JPEG = mupdf.FZ_RECOMPRESS_JPEG
- FZ_RECOMPRESS_J2K = mupdf.FZ_RECOMPRESS_J2K
- FZ_RECOMPRESS_FAX = mupdf.FZ_RECOMPRESS_FAX
- FZ_SUBSAMPLE_AVERAGE = mupdf.FZ_SUBSAMPLE_AVERAGE
- FZ_SUBSAMPLE_BICUBIC = mupdf.FZ_SUBSAMPLE_BICUBIC
- # ------------------------------------------------------------------------------
- # Various PDF Optional Content Flags
- # ------------------------------------------------------------------------------
- PDF_OC_ON = 0
- PDF_OC_TOGGLE = 1
- PDF_OC_OFF = 2
- # ------------------------------------------------------------------------------
- # link kinds and link flags
- # ------------------------------------------------------------------------------
- LINK_NONE = 0
- LINK_GOTO = 1
- LINK_URI = 2
- LINK_LAUNCH = 3
- LINK_NAMED = 4
- LINK_GOTOR = 5
- LINK_FLAG_L_VALID = 1
- LINK_FLAG_T_VALID = 2
- LINK_FLAG_R_VALID = 4
- LINK_FLAG_B_VALID = 8
- LINK_FLAG_FIT_H = 16
- LINK_FLAG_FIT_V = 32
- LINK_FLAG_R_IS_ZOOM = 64
- SigFlag_SignaturesExist = 1
- SigFlag_AppendOnly = 2
- STAMP_Approved = 0
- STAMP_AsIs = 1
- STAMP_Confidential = 2
- STAMP_Departmental = 3
- STAMP_Experimental = 4
- STAMP_Expired = 5
- STAMP_Final = 6
- STAMP_ForComment = 7
- STAMP_ForPublicRelease = 8
- STAMP_NotApproved = 9
- STAMP_NotForPublicRelease = 10
- STAMP_Sold = 11
- STAMP_TopSecret = 12
- STAMP_Draft = 13
- TEXT_ALIGN_LEFT = 0
- TEXT_ALIGN_CENTER = 1
- TEXT_ALIGN_RIGHT = 2
- TEXT_ALIGN_JUSTIFY = 3
- TEXT_FONT_SUPERSCRIPT = 1
- TEXT_FONT_ITALIC = 2
- TEXT_FONT_SERIFED = 4
- TEXT_FONT_MONOSPACED = 8
- TEXT_FONT_BOLD = 16
- TEXT_OUTPUT_TEXT = 0
- TEXT_OUTPUT_HTML = 1
- TEXT_OUTPUT_JSON = 2
- TEXT_OUTPUT_XML = 3
- TEXT_OUTPUT_XHTML = 4
- TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES
- TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE
- TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES
- TEXT_INHIBIT_SPACES = mupdf.FZ_STEXT_INHIBIT_SPACES
- TEXT_DEHYPHENATE = mupdf.FZ_STEXT_DEHYPHENATE
- TEXT_PRESERVE_SPANS = mupdf.FZ_STEXT_PRESERVE_SPANS
- TEXT_MEDIABOX_CLIP = mupdf.FZ_STEXT_MEDIABOX_CLIP
- TEXT_USE_CID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE
- TEXT_COLLECT_STRUCTURE = mupdf.FZ_STEXT_COLLECT_STRUCTURE
- TEXT_ACCURATE_BBOXES = mupdf.FZ_STEXT_ACCURATE_BBOXES
- TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS
- TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT
- TEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT
- if mupdf_version_tuple >= (1, 26):
- TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK
- TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT
- TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES
- TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE
- TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT
- TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS
- TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS
- # 2025-05-07: Non-standard names preserved for backwards compatibility.
- TEXT_STEXT_SEGMENT = TEXT_SEGMENT
- TEXT_CID_FOR_UNKNOWN_UNICODE = TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- TEXTFLAGS_WORDS = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_BLOCKS = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_DICT = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_PRESERVE_IMAGES
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT
- TEXTFLAGS_SEARCH = (0
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_DEHYPHENATE
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_HTML = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_PRESERVE_IMAGES
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_XHTML = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_PRESERVE_IMAGES
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_XML = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_TEXT = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- # Simple text encoding options
- TEXT_ENCODING_LATIN = 0
- TEXT_ENCODING_GREEK = 1
- TEXT_ENCODING_CYRILLIC = 2
- TOOLS_JM_UNIQUE_ID = 0
- # colorspace identifiers
- CS_RGB = 1
- CS_GRAY = 2
- CS_CMYK = 3
- # PDF Blend Modes
- PDF_BM_Color = "Color"
- PDF_BM_ColorBurn = "ColorBurn"
- PDF_BM_ColorDodge = "ColorDodge"
- PDF_BM_Darken = "Darken"
- PDF_BM_Difference = "Difference"
- PDF_BM_Exclusion = "Exclusion"
- PDF_BM_HardLight = "HardLight"
- PDF_BM_Hue = "Hue"
- PDF_BM_Lighten = "Lighten"
- PDF_BM_Luminosity = "Luminosity"
- PDF_BM_Multiply = "Multiply"
- PDF_BM_Normal = "Normal"
- PDF_BM_Overlay = "Overlay"
- PDF_BM_Saturation = "Saturation"
- PDF_BM_Screen = "Screen"
- PDF_BM_SoftLight = "Softlight"
- annot_skel = {
- "goto1": lambda a, b, c, d, e: f"<</A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>/Rect[{e}]/BS<</W 0>>/Subtype/Link>>",
- "goto2": lambda a, b: f"<</A<</S/GoTo/D{a}>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
- "gotor1": lambda a, b, c, d, e, f, g: f"<</A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F({e})/UF({f})/Type/Filespec>>>>/Rect[{g}]/BS<</W 0>>/Subtype/Link>>",
- "gotor2": lambda a, b, c: f"<</A<</S/GoToR/D{a}/F({b})>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
- "launch": lambda a, b, c: f"<</A<</S/Launch/F<</F({a})/UF({b})/Type/Filespec>>>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
- "uri": lambda a, b: f"<</A<</S/URI/URI({a})>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
- "named": lambda a, b: f"<</A<</S/GoTo/D({a})/Type/Action>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
- }
- class FileDataError(RuntimeError):
- """Raised for documents with file structure issues."""
- pass
- class FileNotFoundError(RuntimeError):
- """Raised if file does not exist."""
- pass
- class EmptyFileError(FileDataError):
- """Raised when creating documents from zero-length data."""
- pass
- # propagate exception class to C-level code
- #_set_FileDataError(FileDataError)
-
- csRGB = Colorspace(CS_RGB)
- csGRAY = Colorspace(CS_GRAY)
- csCMYK = Colorspace(CS_CMYK)
- # These don't appear to be visible in classic, but are used
- # internally.
- #
- dictkey_align = "align"
- dictkey_asc = "ascender"
- dictkey_bidi = "bidi"
- dictkey_bbox = "bbox"
- dictkey_blocks = "blocks"
- dictkey_bpc = "bpc"
- dictkey_c = "c"
- dictkey_chars = "chars"
- dictkey_color = "color"
- dictkey_colorspace = "colorspace"
- dictkey_content = "content"
- dictkey_creationDate = "creationDate"
- dictkey_cs_name = "cs-name"
- dictkey_da = "da"
- dictkey_dashes = "dashes"
- dictkey_descr = "description"
- dictkey_desc = "descender"
- dictkey_dir = "dir"
- dictkey_effect = "effect"
- dictkey_ext = "ext"
- dictkey_filename = "filename"
- dictkey_fill = "fill"
- dictkey_flags = "flags"
- dictkey_char_flags = "char_flags"
- dictkey_font = "font"
- dictkey_glyph = "glyph"
- dictkey_height = "height"
- dictkey_id = "id"
- dictkey_image = "image"
- dictkey_items = "items"
- dictkey_length = "length"
- dictkey_lines = "lines"
- dictkey_matrix = "transform"
- dictkey_modDate = "modDate"
- dictkey_name = "name"
- dictkey_number = "number"
- dictkey_origin = "origin"
- dictkey_rect = "rect"
- dictkey_size = "size"
- dictkey_smask = "smask"
- dictkey_spans = "spans"
- dictkey_stroke = "stroke"
- dictkey_style = "style"
- dictkey_subject = "subject"
- dictkey_text = "text"
- dictkey_title = "title"
- dictkey_type = "type"
- dictkey_ufilename = "ufilename"
- dictkey_width = "width"
- dictkey_wmode = "wmode"
- dictkey_xref = "xref"
- dictkey_xres = "xres"
- dictkey_yres = "yres"
- try:
- from pymupdf_fonts import fontdescriptors, fontbuffers
- fitz_fontdescriptors = fontdescriptors.copy()
- for k in fitz_fontdescriptors.keys():
- fitz_fontdescriptors[k]["loader"] = fontbuffers[k]
- del fontdescriptors, fontbuffers
- except ImportError:
- fitz_fontdescriptors = {}
- symbol_glyphs = ( # Glyph list for the built-in font 'Symbol'
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (32, 0.25),
- (33, 0.333),
- (34, 0.713),
- (35, 0.5),
- (36, 0.549),
- (37, 0.833),
- (38, 0.778),
- (39, 0.439),
- (40, 0.333),
- (41, 0.333),
- (42, 0.5),
- (43, 0.549),
- (44, 0.25),
- (45, 0.549),
- (46, 0.25),
- (47, 0.278),
- (48, 0.5),
- (49, 0.5),
- (50, 0.5),
- (51, 0.5),
- (52, 0.5),
- (53, 0.5),
- (54, 0.5),
- (55, 0.5),
- (56, 0.5),
- (57, 0.5),
- (58, 0.278),
- (59, 0.278),
- (60, 0.549),
- (61, 0.549),
- (62, 0.549),
- (63, 0.444),
- (64, 0.549),
- (65, 0.722),
- (66, 0.667),
- (67, 0.722),
- (68, 0.612),
- (69, 0.611),
- (70, 0.763),
- (71, 0.603),
- (72, 0.722),
- (73, 0.333),
- (74, 0.631),
- (75, 0.722),
- (76, 0.686),
- (77, 0.889),
- (78, 0.722),
- (79, 0.722),
- (80, 0.768),
- (81, 0.741),
- (82, 0.556),
- (83, 0.592),
- (84, 0.611),
- (85, 0.69),
- (86, 0.439),
- (87, 0.768),
- (88, 0.645),
- (89, 0.795),
- (90, 0.611),
- (91, 0.333),
- (92, 0.863),
- (93, 0.333),
- (94, 0.658),
- (95, 0.5),
- (96, 0.5),
- (97, 0.631),
- (98, 0.549),
- (99, 0.549),
- (100, 0.494),
- (101, 0.439),
- (102, 0.521),
- (103, 0.411),
- (104, 0.603),
- (105, 0.329),
- (106, 0.603),
- (107, 0.549),
- (108, 0.549),
- (109, 0.576),
- (110, 0.521),
- (111, 0.549),
- (112, 0.549),
- (113, 0.521),
- (114, 0.549),
- (115, 0.603),
- (116, 0.439),
- (117, 0.576),
- (118, 0.713),
- (119, 0.686),
- (120, 0.493),
- (121, 0.686),
- (122, 0.494),
- (123, 0.48),
- (124, 0.2),
- (125, 0.48),
- (126, 0.549),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (160, 0.25),
- (161, 0.62),
- (162, 0.247),
- (163, 0.549),
- (164, 0.167),
- (165, 0.713),
- (166, 0.5),
- (167, 0.753),
- (168, 0.753),
- (169, 0.753),
- (170, 0.753),
- (171, 1.042),
- (172, 0.713),
- (173, 0.603),
- (174, 0.987),
- (175, 0.603),
- (176, 0.4),
- (177, 0.549),
- (178, 0.411),
- (179, 0.549),
- (180, 0.549),
- (181, 0.576),
- (182, 0.494),
- (183, 0.46),
- (184, 0.549),
- (185, 0.549),
- (186, 0.549),
- (187, 0.549),
- (188, 1),
- (189, 0.603),
- (190, 1),
- (191, 0.658),
- (192, 0.823),
- (193, 0.686),
- (194, 0.795),
- (195, 0.987),
- (196, 0.768),
- (197, 0.768),
- (198, 0.823),
- (199, 0.768),
- (200, 0.768),
- (201, 0.713),
- (202, 0.713),
- (203, 0.713),
- (204, 0.713),
- (205, 0.713),
- (206, 0.713),
- (207, 0.713),
- (208, 0.768),
- (209, 0.713),
- (210, 0.79),
- (211, 0.79),
- (212, 0.89),
- (213, 0.823),
- (214, 0.549),
- (215, 0.549),
- (216, 0.713),
- (217, 0.603),
- (218, 0.603),
- (219, 1.042),
- (220, 0.987),
- (221, 0.603),
- (222, 0.987),
- (223, 0.603),
- (224, 0.494),
- (225, 0.329),
- (226, 0.79),
- (227, 0.79),
- (228, 0.786),
- (229, 0.713),
- (230, 0.384),
- (231, 0.384),
- (232, 0.384),
- (233, 0.384),
- (234, 0.384),
- (235, 0.384),
- (236, 0.494),
- (237, 0.494),
- (238, 0.494),
- (239, 0.494),
- (183, 0.46),
- (241, 0.329),
- (242, 0.274),
- (243, 0.686),
- (244, 0.686),
- (245, 0.686),
- (246, 0.384),
- (247, 0.549),
- (248, 0.384),
- (249, 0.384),
- (250, 0.384),
- (251, 0.384),
- (252, 0.494),
- (253, 0.494),
- (254, 0.494),
- (183, 0.46),
- )
- zapf_glyphs = ( # Glyph list for the built-in font 'ZapfDingbats'
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (32, 0.278),
- (33, 0.974),
- (34, 0.961),
- (35, 0.974),
- (36, 0.98),
- (37, 0.719),
- (38, 0.789),
- (39, 0.79),
- (40, 0.791),
- (41, 0.69),
- (42, 0.96),
- (43, 0.939),
- (44, 0.549),
- (45, 0.855),
- (46, 0.911),
- (47, 0.933),
- (48, 0.911),
- (49, 0.945),
- (50, 0.974),
- (51, 0.755),
- (52, 0.846),
- (53, 0.762),
- (54, 0.761),
- (55, 0.571),
- (56, 0.677),
- (57, 0.763),
- (58, 0.76),
- (59, 0.759),
- (60, 0.754),
- (61, 0.494),
- (62, 0.552),
- (63, 0.537),
- (64, 0.577),
- (65, 0.692),
- (66, 0.786),
- (67, 0.788),
- (68, 0.788),
- (69, 0.79),
- (70, 0.793),
- (71, 0.794),
- (72, 0.816),
- (73, 0.823),
- (74, 0.789),
- (75, 0.841),
- (76, 0.823),
- (77, 0.833),
- (78, 0.816),
- (79, 0.831),
- (80, 0.923),
- (81, 0.744),
- (82, 0.723),
- (83, 0.749),
- (84, 0.79),
- (85, 0.792),
- (86, 0.695),
- (87, 0.776),
- (88, 0.768),
- (89, 0.792),
- (90, 0.759),
- (91, 0.707),
- (92, 0.708),
- (93, 0.682),
- (94, 0.701),
- (95, 0.826),
- (96, 0.815),
- (97, 0.789),
- (98, 0.789),
- (99, 0.707),
- (100, 0.687),
- (101, 0.696),
- (102, 0.689),
- (103, 0.786),
- (104, 0.787),
- (105, 0.713),
- (106, 0.791),
- (107, 0.785),
- (108, 0.791),
- (109, 0.873),
- (110, 0.761),
- (111, 0.762),
- (112, 0.762),
- (113, 0.759),
- (114, 0.759),
- (115, 0.892),
- (116, 0.892),
- (117, 0.788),
- (118, 0.784),
- (119, 0.438),
- (120, 0.138),
- (121, 0.277),
- (122, 0.415),
- (123, 0.392),
- (124, 0.392),
- (125, 0.668),
- (126, 0.668),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (161, 0.732),
- (162, 0.544),
- (163, 0.544),
- (164, 0.91),
- (165, 0.667),
- (166, 0.76),
- (167, 0.76),
- (168, 0.776),
- (169, 0.595),
- (170, 0.694),
- (171, 0.626),
- (172, 0.788),
- (173, 0.788),
- (174, 0.788),
- (175, 0.788),
- (176, 0.788),
- (177, 0.788),
- (178, 0.788),
- (179, 0.788),
- (180, 0.788),
- (181, 0.788),
- (182, 0.788),
- (183, 0.788),
- (184, 0.788),
- (185, 0.788),
- (186, 0.788),
- (187, 0.788),
- (188, 0.788),
- (189, 0.788),
- (190, 0.788),
- (191, 0.788),
- (192, 0.788),
- (193, 0.788),
- (194, 0.788),
- (195, 0.788),
- (196, 0.788),
- (197, 0.788),
- (198, 0.788),
- (199, 0.788),
- (200, 0.788),
- (201, 0.788),
- (202, 0.788),
- (203, 0.788),
- (204, 0.788),
- (205, 0.788),
- (206, 0.788),
- (207, 0.788),
- (208, 0.788),
- (209, 0.788),
- (210, 0.788),
- (211, 0.788),
- (212, 0.894),
- (213, 0.838),
- (214, 1.016),
- (215, 0.458),
- (216, 0.748),
- (217, 0.924),
- (218, 0.748),
- (219, 0.918),
- (220, 0.927),
- (221, 0.928),
- (222, 0.928),
- (223, 0.834),
- (224, 0.873),
- (225, 0.828),
- (226, 0.924),
- (227, 0.924),
- (228, 0.917),
- (229, 0.93),
- (230, 0.931),
- (231, 0.463),
- (232, 0.883),
- (233, 0.836),
- (234, 0.836),
- (235, 0.867),
- (236, 0.867),
- (237, 0.696),
- (238, 0.696),
- (239, 0.874),
- (183, 0.788),
- (241, 0.874),
- (242, 0.76),
- (243, 0.946),
- (244, 0.771),
- (245, 0.865),
- (246, 0.771),
- (247, 0.888),
- (248, 0.967),
- (249, 0.888),
- (250, 0.831),
- (251, 0.873),
- (252, 0.927),
- (253, 0.97),
- (183, 0.788),
- (183, 0.788),
- )
- # Functions
- #
- def _rect_area(width, height, args):
- # Used by IRect.get_area() and Rect.get_area().
- unit = args[0] if args else 'px'
- u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
- f = (u[unit][0] / u[unit][1]) ** 2
- return f * width * height
- def _read_samples( pixmap, offset, n):
- # fixme: need to be able to get a sample in one call, as a Python
- # bytes or similar.
- ret = []
- if not pixmap.samples():
- # mupdf.fz_samples_get() gives a segv if pixmap->samples is null.
- return ret
- for i in range( n):
- ret.append( mupdf.fz_samples_get( pixmap, offset + i))
- return bytes( ret)
- def _INRANGE(v, low, high):
- return low <= v and v <= high
- def _remove_dest_range(pdf, numbers):
- pagecount = mupdf.pdf_count_pages(pdf)
- for i in range(pagecount):
- n1 = i
- if n1 in numbers:
- continue
- pageref = mupdf.pdf_lookup_page_obj( pdf, i)
- annots = mupdf.pdf_dict_get( pageref, PDF_NAME('Annots'))
- if not annots.m_internal:
- continue
- len_ = mupdf.pdf_array_len(annots)
- for j in range(len_ - 1, -1, -1):
- o = mupdf.pdf_array_get( annots, j)
- if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( o, PDF_NAME('Subtype')), PDF_NAME('Link')):
- continue
- action = mupdf.pdf_dict_get( o, PDF_NAME('A'))
- dest = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
- if action.m_internal:
- if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( action, PDF_NAME('S')), PDF_NAME('GoTo')):
- continue
- dest = mupdf.pdf_dict_get( action, PDF_NAME('D'))
- pno = -1
- if mupdf.pdf_is_array( dest):
- target = mupdf.pdf_array_get( dest, 0)
- pno = mupdf.pdf_lookup_page_number( pdf, target)
- elif mupdf.pdf_is_string( dest):
- location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest))
- pno = location.page
- if pno < 0: # page number lookup did not work
- continue
- n1 = pno
- if n1 in numbers:
- mupdf.pdf_array_delete( annots, j)
- def ASSERT_PDF(cond):
- assert isinstance(cond, (mupdf.PdfPage, mupdf.PdfDocument)), f'{type(cond)=} {cond=}'
- if not cond.m_internal:
- raise Exception(MSG_IS_NO_PDF)
- def EMPTY_IRECT():
- return IRect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
- def EMPTY_QUAD():
- return EMPTY_RECT().quad
- def EMPTY_RECT():
- return Rect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
- def ENSURE_OPERATION(pdf):
- if not JM_have_operation(pdf):
- raise Exception("No journalling operation started")
- def INFINITE_IRECT():
- return IRect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
- def INFINITE_QUAD():
- return INFINITE_RECT().quad
- def INFINITE_RECT():
- return Rect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
- def JM_BinFromBuffer(buffer_):
- '''
- Turn fz_buffer into a Python bytes object
- '''
- assert isinstance(buffer_, mupdf.FzBuffer)
- ret = mupdf.fz_buffer_extract_copy(buffer_)
- return ret
- def JM_EscapeStrFromStr(c):
- # `c` is typically from SWIG which will have converted a `const char*` from
- # C into a Python `str` using `PyUnicode_DecodeUTF8(carray, static_cast<
- # Py_ssize_t >(size), "surrogateescape")`. This gives us a Python `str`
- # with some characters encoded as a \0xdcXY sequence, where `XY` are hex
- # digits for an invalid byte in the original `const char*`.
- #
- # This is actually a reasonable way of representing arbitrary
- # strings from C, but we want to mimic what PyMuPDF does. It uses
- # `PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace")`
- # which gives a string containing actual unicode characters for any invalid
- # bytes.
- #
- # We mimic this by converting the `str` to a `bytes` with 'surrogateescape'
- # to recognise \0xdcXY sequences, then convert the individual bytes into a
- # `str` using `chr()`.
- #
- # Would be good to have a more efficient way to do this.
- #
- if c is None:
- return ''
- assert isinstance(c, str), f'{type(c)=}'
- b = c.encode('utf8', 'surrogateescape')
- ret = ''
- for bb in b:
- ret += chr(bb)
- return ret
- def JM_BufferFromBytes(stream):
- '''
- Make fz_buffer from a PyBytes, PyByteArray or io.BytesIO object. If a text
- io.BytesIO, we convert to binary by encoding as utf8.
- '''
- if isinstance(stream, (bytes, bytearray)):
- data = stream
- elif hasattr(stream, 'getvalue'):
- data = stream.getvalue()
- if isinstance(data, str):
- data = data.encode('utf-8')
- if not isinstance(data, (bytes, bytearray)):
- raise Exception(f'.getvalue() returned unexpected type: {type(data)}')
- else:
- return mupdf.FzBuffer()
- return mupdf.fz_new_buffer_from_copied_data(data)
- def JM_FLOAT_ITEM(obj, idx):
- if not PySequence_Check(obj):
- return None
- return float(obj[idx])
- def JM_INT_ITEM(obj, idx):
- if idx < len(obj):
- temp = obj[idx]
- if isinstance(temp, (int, float)):
- return 0, temp
- return 1, None
- def JM_pixmap_from_page(doc, page, ctm, cs, alpha, annots, clip):
- '''
- Pixmap creation directly using a short-lived displaylist, so we can support
- separations.
- '''
- SPOTS_NONE = 0
- SPOTS_OVERPRINT_SIM = 1
- SPOTS_FULL = 2
-
- FZ_ENABLE_SPOT_RENDERING = True # fixme: this is a build-time setting in MuPDF's config.h.
- if FZ_ENABLE_SPOT_RENDERING:
- spots = SPOTS_OVERPRINT_SIM
- else:
- spots = SPOTS_NONE
- seps = None
- colorspace = cs
-
- matrix = JM_matrix_from_py(ctm)
- rect = mupdf.fz_bound_page(page)
- rclip = JM_rect_from_py(clip)
- rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
- rect = mupdf.fz_transform_rect(rect, matrix)
- bbox = mupdf.fz_round_rect(rect)
- # Pixmap of the document's /OutputIntents ("output intents")
- oi = mupdf.fz_document_output_intent(doc)
- # if present and compatible, use it instead of the parameter
- if oi.m_internal:
- if mupdf.fz_colorspace_n(oi) == mupdf.fz_colorspace_n(cs):
- colorspace = mupdf.fz_keep_colorspace(oi)
- # check if spots rendering is available and if so use separations
- if spots != SPOTS_NONE:
- seps = mupdf.fz_page_separations(page)
- if seps.m_internal:
- n = mupdf.fz_count_separations(seps)
- if spots == SPOTS_FULL:
- for i in range(n):
- mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_SPOT)
- else:
- for i in range(n):
- mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_COMPOSITE)
- elif mupdf.fz_page_uses_overprint(page):
- # This page uses overprint, so we need an empty
- # sep object to force the overprint simulation on.
- seps = mupdf.fz_new_separations(0)
- elif oi.m_internal and mupdf.fz_colorspace_n(oi) != mupdf.fz_colorspace_n(colorspace):
- # We have an output intent, and it's incompatible
- # with the colorspace our device needs. Force the
- # overprint simulation on, because this ensures that
- # we 'simulate' the output intent too.
- seps = mupdf.fz_new_separations(0)
- pix = mupdf.fz_new_pixmap_with_bbox(colorspace, bbox, seps, alpha)
- if alpha:
- mupdf.fz_clear_pixmap(pix)
- else:
- mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
- dev = mupdf.fz_new_draw_device(matrix, pix)
- if annots:
- mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- else:
- mupdf.fz_run_page_contents(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- return pix
- def JM_StrAsChar(x):
- # fixme: should encode, but swig doesn't pass bytes to C as const char*.
- return x
- #return x.encode('utf8')
- def JM_TUPLE(o: typing.Sequence) -> tuple:
- return tuple(map(lambda x: round(x, 5) if abs(x) >= 1e-4 else 0, o))
- def JM_TUPLE3(o: typing.Sequence) -> tuple:
- return tuple(map(lambda x: round(x, 3) if abs(x) >= 1e-3 else 0, o))
- def JM_UnicodeFromStr(s):
- if s is None:
- return ''
- if isinstance(s, bytes):
- s = s.decode('utf8')
- assert isinstance(s, str), f'{type(s)=} {s=}'
- return s
- def JM_add_annot_id(annot, stem):
- '''
- Add a unique /NM key to an annotation or widget.
- Append a number to 'stem' such that the result is a unique name.
- '''
- assert isinstance(annot, mupdf.PdfAnnot)
- page = _pdf_annot_page(annot)
- annot_obj = mupdf.pdf_annot_obj( annot)
- names = JM_get_annot_id_list(page)
- i = 0
- while 1:
- stem_id = f'{JM_annot_id_stem}-{stem}{i}'
- if stem_id not in names:
- break
- i += 1
- response = JM_StrAsChar(stem_id)
- name = mupdf.pdf_new_string( response, len(response))
- mupdf.pdf_dict_puts(annot_obj, "NM", name)
- page.doc().m_internal.resynth_required = 0
- def JM_add_oc_object(pdf, ref, xref):
- '''
- Add OC object reference to a dictionary
- '''
- indobj = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not mupdf.pdf_is_dict(indobj):
- RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
- type_ = mupdf.pdf_dict_get(indobj, PDF_NAME('Type'))
- if (mupdf.pdf_objcmp(type_, PDF_NAME('OCG')) == 0
- or mupdf.pdf_objcmp(type_, PDF_NAME('OCMD')) == 0
- ):
- mupdf.pdf_dict_put(ref, PDF_NAME('OC'), indobj)
- else:
- RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
- def JM_annot_border(annot_obj):
- dash_py = list()
- style = None
- width = -1
- clouds = -1
- obj = None
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Border'))
- if mupdf.pdf_is_array( obj):
- width = mupdf.pdf_to_real( mupdf.pdf_array_get( obj, 2))
- if mupdf.pdf_array_len( obj) == 4:
- dash = mupdf.pdf_array_get( obj, 3)
- for i in range( mupdf.pdf_array_len( dash)):
- val = mupdf.pdf_to_int( mupdf.pdf_array_get( dash, i))
- dash_py.append( val)
- bs_o = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BS'))
- if bs_o.m_internal:
- width = mupdf.pdf_to_real( mupdf.pdf_dict_get( bs_o, PDF_NAME('W')))
- style = mupdf.pdf_to_name( mupdf.pdf_dict_get( bs_o, PDF_NAME('S')))
- if style == '':
- style = None
- obj = mupdf.pdf_dict_get( bs_o, PDF_NAME('D'))
- if obj.m_internal:
- for i in range( mupdf.pdf_array_len( obj)):
- val = mupdf.pdf_to_int( mupdf.pdf_array_get( obj, i))
- dash_py.append( val)
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
- if obj.m_internal:
- clouds = mupdf.pdf_to_int( mupdf.pdf_dict_get( obj, PDF_NAME('I')))
- res = dict()
- res[ dictkey_width] = width
- res[ dictkey_dashes] = tuple( dash_py)
- res[ dictkey_style] = style
- res[ 'clouds'] = clouds
- return res
- def JM_annot_colors(annot_obj):
- res = dict()
- bc = list() # stroke colors
- fc =list() # fill colors
- o = mupdf.pdf_dict_get(annot_obj, mupdf.PDF_ENUM_NAME_C)
- if mupdf.pdf_is_array(o):
- n = mupdf.pdf_array_len(o)
- for i in range(n):
- col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
- bc.append(col)
- res[dictkey_stroke] = bc
- o = mupdf.pdf_dict_gets(annot_obj, "IC")
- if mupdf.pdf_is_array(o):
- n = mupdf.pdf_array_len(o)
- for i in range(n):
- col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
- fc.append(col)
- res[dictkey_fill] = fc
- return res
- def JM_annot_set_border( border, doc, annot_obj):
- assert isinstance(border, dict)
- obj = None
- dashlen = 0
- nwidth = border.get( dictkey_width) # new width
- ndashes = border.get( dictkey_dashes) # new dashes
- nstyle = border.get( dictkey_style) # new style
- nclouds = border.get( 'clouds', -1) # new clouds value
- # get old border properties
- oborder = JM_annot_border( annot_obj)
- # delete border-related entries
- mupdf.pdf_dict_del( annot_obj, PDF_NAME('BS'))
- mupdf.pdf_dict_del( annot_obj, PDF_NAME('BE'))
- mupdf.pdf_dict_del( annot_obj, PDF_NAME('Border'))
- # populate border items: keep old values for any omitted new ones
- if nwidth < 0:
- nwidth = oborder.get( dictkey_width) # no new width: keep current
- if ndashes is None:
- ndashes = oborder.get( dictkey_dashes) # no new dashes: keep old
- if nstyle is None:
- nstyle = oborder.get( dictkey_style) # no new style: keep old
- if nclouds < 0:
- nclouds = oborder.get( "clouds", -1) # no new clouds: keep old
- if isinstance( ndashes, tuple) and len( ndashes) > 0:
- dashlen = len( ndashes)
- darr = mupdf.pdf_new_array( doc, dashlen)
- for d in ndashes:
- mupdf.pdf_array_push_int( darr, d)
- mupdf.pdf_dict_putl( annot_obj, darr, PDF_NAME('BS'), PDF_NAME('D'))
- mupdf.pdf_dict_putl(
- annot_obj,
- mupdf.pdf_new_real( nwidth),
- PDF_NAME('BS'),
- PDF_NAME('W'),
- )
- if dashlen == 0:
- obj = JM_get_border_style( nstyle)
- else:
- obj = PDF_NAME('D')
- mupdf.pdf_dict_putl( annot_obj, obj, PDF_NAME('BS'), PDF_NAME('S'))
- if nclouds > 0:
- mupdf.pdf_dict_put_dict( annot_obj, PDF_NAME('BE'), 2)
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
- mupdf.pdf_dict_put( obj, PDF_NAME('S'), PDF_NAME('C'))
- mupdf.pdf_dict_put_int( obj, PDF_NAME('I'), nclouds)
- def make_escape(ch):
- if ch == 92:
- return "\\u005c"
- elif 32 <= ch <= 127 or ch == 10:
- return chr(ch)
- elif 0xd800 <= ch <= 0xdfff: # orphaned surrogate
- return "\\ufffd"
- elif ch <= 0xffff:
- return "\\u%04x" % ch
- else:
- return "\\U%08x" % ch
- def JM_append_rune(buff, ch):
- """
- APPEND non-ascii runes in unicode escape format to fz_buffer.
- """
- mupdf.fz_append_string(buff, make_escape(ch))
- def JM_append_word(lines, buff, wbbox, block_n, line_n, word_n):
- '''
- Functions for wordlist output
- '''
- s = JM_EscapeStrFromBuffer(buff)
- litem = (
- wbbox.x0,
- wbbox.y0,
- wbbox.x1,
- wbbox.y1,
- s,
- block_n,
- line_n,
- word_n,
- )
- lines.append(litem)
- return word_n + 1, mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word counter
- def JM_add_layer_config( pdf, name, creator, ON):
- '''
- Add OC configuration to the PDF catalog
- '''
- ocp = JM_ensure_ocproperties( pdf)
- configs = mupdf.pdf_dict_get( ocp, PDF_NAME('Configs'))
- if not mupdf.pdf_is_array( configs):
- configs = mupdf.pdf_dict_put_array( ocp, PDF_NAME('Configs'), 1)
- D = mupdf.pdf_new_dict( pdf, 5)
- mupdf.pdf_dict_put_text_string( D, PDF_NAME('Name'), name)
- if creator is not None:
- mupdf.pdf_dict_put_text_string( D, PDF_NAME('Creator'), creator)
- mupdf.pdf_dict_put( D, PDF_NAME('BaseState'), PDF_NAME('OFF'))
- onarray = mupdf.pdf_dict_put_array( D, PDF_NAME('ON'), 5)
- if not ON:
- pass
- else:
- ocgs = mupdf.pdf_dict_get( ocp, PDF_NAME('OCGs'))
- n = len(ON)
- for i in range(n):
- xref = 0
- e, xref = JM_INT_ITEM(ON, i)
- if e == 1:
- continue
- ind = mupdf.pdf_new_indirect( pdf, xref, 0)
- if mupdf.pdf_array_contains( ocgs, ind):
- mupdf.pdf_array_push( onarray, ind)
- mupdf.pdf_array_push( configs, D)
- def JM_char_bbox(line, ch):
- '''
- return rect of char quad
- '''
- q = JM_char_quad(line, ch)
- r = mupdf.fz_rect_from_quad(q)
- if not line.m_internal.wmode:
- return r
- if r.y1 < r.y0 + ch.m_internal.size:
- r.y0 = r.y1 - ch.m_internal.size
- return r
- def JM_char_font_flags(font, line, ch):
- flags = 0
- if line and ch:
- flags += detect_super_script(line, ch)
- flags += mupdf.fz_font_is_italic(font) * TEXT_FONT_ITALIC
- flags += mupdf.fz_font_is_serif(font) * TEXT_FONT_SERIFED
- flags += mupdf.fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED
- flags += mupdf.fz_font_is_bold(font) * TEXT_FONT_BOLD
- return flags
- def JM_char_quad(line, ch):
- '''
- re-compute char quad if ascender/descender values make no sense
- '''
- if 1 and g_use_extra:
- # This reduces time taken to extract text from PyMuPDF.pdf from 20s to
- # 15s.
- return mupdf.FzQuad(extra.JM_char_quad( line.m_internal, ch.m_internal))
-
- assert isinstance(line, mupdf.FzStextLine)
- assert isinstance(ch, mupdf.FzStextChar)
- if _globals.skip_quad_corrections: # no special handling
- return ch.quad
- if line.m_internal.wmode: # never touch vertical write mode
- return ch.quad
- font = mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))
- asc = JM_font_ascender(font)
- dsc = JM_font_descender(font)
- fsize = ch.m_internal.size
- asc_dsc = asc - dsc + FLT_EPSILON
- if asc_dsc >= 1 and _globals.small_glyph_heights == 0: # no problem
- return mupdf.FzQuad(ch.m_internal.quad)
- # Re-compute quad with adjusted ascender / descender values:
- # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
- # re-rotate and move back to ch->origin location.
- fsize = ch.m_internal.size
- bbox = mupdf.fz_font_bbox(font)
- fwidth = bbox.x1 - bbox.x0
- if asc < 1e-3: # probably Tesseract glyphless font
- dsc = -0.1
- asc = 0.9
- asc_dsc = 1.0
-
- if _globals.small_glyph_heights or asc_dsc < 1:
- dsc = dsc / asc_dsc
- asc = asc / asc_dsc
- asc_dsc = asc - dsc
- asc = asc * fsize / asc_dsc
- dsc = dsc * fsize / asc_dsc
-
- # Re-compute quad with the adjusted ascender / descender values:
- # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
- # re-rotate and move back to ch->origin location.
- c = line.m_internal.dir.x # cosine
- s = line.m_internal.dir.y # sine
- trm1 = mupdf.fz_make_matrix(c, -s, s, c, 0, 0) # derotate
- trm2 = mupdf.fz_make_matrix(c, s, -s, c, 0, 0) # rotate
- if (c == -1): # left-right flip
- trm1.d = 1
- trm2.d = 1
- xlate1 = mupdf.fz_make_matrix(1, 0, 0, 1, -ch.m_internal.origin.x, -ch.m_internal.origin.y)
- xlate2 = mupdf.fz_make_matrix(1, 0, 0, 1, ch.m_internal.origin.x, ch.m_internal.origin.y)
- quad = mupdf.fz_transform_quad(mupdf.FzQuad(ch.m_internal.quad), xlate1) # move origin to (0,0)
- quad = mupdf.fz_transform_quad(quad, trm1) # de-rotate corners
-
- # adjust vertical coordinates
- if c == 1 and quad.ul.y > 0: # up-down flip
- quad.ul.y = asc
- quad.ur.y = asc
- quad.ll.y = dsc
- quad.lr.y = dsc
- else:
- quad.ul.y = -asc
- quad.ur.y = -asc
- quad.ll.y = -dsc
- quad.lr.y = -dsc
- # adjust horizontal coordinates that are too crazy:
- # (1) left x must be >= 0
- # (2) if bbox width is 0, lookup char advance in font.
- if quad.ll.x < 0:
- quad.ll.x = 0
- quad.ul.x = 0
-
- cwidth = quad.lr.x - quad.ll.x
- if cwidth < FLT_EPSILON:
- glyph = mupdf.fz_encode_character( font, ch.m_internal.c)
- if glyph:
- fwidth = mupdf.fz_advance_glyph( font, glyph, line.m_internal.wmode)
- quad.lr.x = quad.ll.x + fwidth * fsize
- quad.ur.x = quad.lr.x
- quad = mupdf.fz_transform_quad(quad, trm2) # rotate back
- quad = mupdf.fz_transform_quad(quad, xlate2) # translate back
- return quad
- def JM_choice_options(annot):
- '''
- return list of choices for list or combo boxes
- '''
- annot_obj = mupdf.pdf_annot_obj( annot.this)
-
- opts = mupdf.pdf_choice_widget_options2( annot, 0)
- n = len( opts)
- if n == 0:
- return # wrong widget type
- optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Opt'))
- liste = []
- for i in range( n):
- m = mupdf.pdf_array_len( mupdf.pdf_array_get( optarr, i))
- if m == 2:
- val = (
- mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 0)),
- mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 1)),
- )
- liste.append( val)
- else:
- val = mupdf.pdf_to_text_string( mupdf.pdf_array_get( optarr, i))
- liste.append( val)
- return liste
- def JM_clear_pixmap_rect_with_value(dest, value, b):
- '''
- Clear a pixmap rectangle - my version also supports non-alpha pixmaps
- '''
- b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox(dest))
- w = b.x1 - b.x0
- y = b.y1 - b.y0
- if w <= 0 or y <= 0:
- return 0
- destspan = dest.stride()
- destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
- # CMYK needs special handling (and potentially any other subtractive colorspaces)
- if mupdf.fz_colorspace_n(dest.colorspace()) == 4:
- value = 255 - value
- while 1:
- s = destp
- for x in range(0, w):
- mupdf.fz_samples_set(dest, s, 0)
- s += 1
- mupdf.fz_samples_set(dest, s, 0)
- s += 1
- mupdf.fz_samples_set(dest, s, 0)
- s += 1
- mupdf.fz_samples_set(dest, s, value)
- s += 1
- if dest.alpha():
- mupdf.fz_samples_set(dest, s, 255)
- s += 1
- destp += destspan
- if y == 0:
- break
- y -= 1
- return 1
- while 1:
- s = destp
- for x in range(w):
- for k in range(dest.n()-1):
- mupdf.fz_samples_set(dest, s, value)
- s += 1
- if dest.alpha():
- mupdf.fz_samples_set(dest, s, 255)
- s += 1
- else:
- mupdf.fz_samples_set(dest, s, value)
- s += 1
- destp += destspan
- if y == 0:
- break
- y -= 1
- return 1
- def JM_color_FromSequence(color):
-
- if isinstance(color, (int, float)): # maybe just a single float
- color = [color]
-
- if not isinstance( color, (list, tuple)):
- return -1, []
-
- if len(color) not in (0, 1, 3, 4):
- return -1, []
-
- ret = color[:]
- for i in range(len(ret)):
- if ret[i] < 0 or ret[i] > 1:
- ret[i] = 1
- return len(ret), ret
- def JM_color_count( pm, clip):
- if 1 or g_use_extra:
- return extra.ll_JM_color_count(pm.m_internal, clip)
-
- rc = dict()
- cnt = 0
- irect = mupdf.fz_pixmap_bbox( pm)
- irect = mupdf.fz_intersect_irect(irect, mupdf.fz_round_rect(JM_rect_from_py(clip)))
- stride = pm.stride()
- width = irect.x1 - irect.x0
- height = irect.y1 - irect.y0
- n = pm.n()
- substride = width * n
- s = stride * (irect.y0 - pm.y()) + (irect.x0 - pm.x()) * n
- oldpix = _read_samples( pm, s, n)
- cnt = 0
- if mupdf.fz_is_empty_irect(irect):
- return rc
- for i in range( height):
- for j in range( 0, substride, n):
- newpix = _read_samples( pm, s + j, n)
- if newpix != oldpix:
- pixel = oldpix
- c = rc.get( pixel, None)
- if c is not None:
- cnt += c
- rc[ pixel] = cnt
- cnt = 1
- oldpix = newpix
- else:
- cnt += 1
- s += stride
- pixel = oldpix
- c = rc.get( pixel)
- if c is not None:
- cnt += c
- rc[ pixel] = cnt
- return rc
- def JM_compress_buffer(inbuffer):
- '''
- compress char* into a new buffer
- '''
- data, compressed_length = mupdf.fz_new_deflated_data_from_buffer(
- inbuffer,
- mupdf.FZ_DEFLATE_BEST,
- )
- #log( '{=data compressed_length}')
- if not data or compressed_length == 0:
- return None
- buf = mupdf.FzBuffer(mupdf.fz_new_buffer_from_data(data, compressed_length))
- mupdf.fz_resize_buffer(buf, compressed_length)
- return buf
- def JM_copy_rectangle(page, area):
- need_new_line = 0
- buffer = io.StringIO()
- for block in page:
- if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
- continue
- for line in block:
- line_had_text = 0
- for ch in line:
- r = JM_char_bbox(line, ch)
- if JM_rects_overlap(area, r):
- line_had_text = 1
- if need_new_line:
- buffer.write("\n")
- need_new_line = 0
- buffer.write(make_escape(ch.m_internal.c))
- if line_had_text:
- need_new_line = 1
- s = buffer.getvalue() # take over the data
- return s
- def JM_convert_to_pdf(doc, fp, tp, rotate):
- '''
- Convert any MuPDF document to a PDF
- Returns bytes object containing the PDF, created via 'write' function.
- '''
- pdfout = mupdf.PdfDocument()
- incr = 1
- s = fp
- e = tp
- if fp > tp:
- incr = -1 # count backwards
- s = tp # adjust ...
- e = fp # ... range
- rot = JM_norm_rotation(rotate)
- i = fp
- while 1: # interpret & write document pages as PDF pages
- if not _INRANGE(i, s, e):
- break
- page = mupdf.fz_load_page(doc, i)
- mediabox = mupdf.fz_bound_page(page)
- dev, resources, contents = mupdf.pdf_page_write(pdfout, mediabox)
- mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- dev = None
- page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents)
- mupdf.pdf_insert_page(pdfout, -1, page_obj)
- i += incr
- # PDF created - now write it to Python bytearray
- # prepare write options structure
- opts = mupdf.PdfWriteOptions()
- opts.do_garbage = 4
- opts.do_compress = 1
- opts.do_compress_images = 1
- opts.do_compress_fonts = 1
- opts.do_sanitize = 1
- opts.do_incremental = 0
- opts.do_ascii = 0
- opts.do_decompress = 0
- opts.do_linear = 0
- opts.do_clean = 1
- opts.do_pretty = 0
- res = mupdf.fz_new_buffer(8192)
- out = mupdf.FzOutput(res)
- mupdf.pdf_write_document(pdfout, out, opts)
- out.fz_close_output()
- c = mupdf.fz_buffer_extract_copy(res)
- assert isinstance(c, bytes)
- return c
- # Copied from MuPDF v1.14
- # Create widget
- def JM_create_widget(doc, page, type, fieldname):
- old_sigflags = mupdf.pdf_to_int(mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/SigFlags"))
- #log( '*** JM_create_widget()')
- #log( f'{mupdf.pdf_create_annot_raw=}')
- #log( f'{page=}')
- #log( f'{mupdf.PDF_ANNOT_WIDGET=}')
- annot = mupdf.pdf_create_annot_raw(page, mupdf.PDF_ANNOT_WIDGET)
- annot_obj = mupdf.pdf_annot_obj(annot)
- try:
- JM_set_field_type(doc, annot_obj, type)
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), fieldname)
- if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- sigflags = old_sigflags | (SigFlag_SignaturesExist | SigFlag_AppendOnly)
- mupdf.pdf_dict_putl(
- mupdf.pdf_trailer(doc),
- mupdf.pdf_new_int(sigflags),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('SigFlags'),
- )
- # pdf_create_annot will have linked the new widget into the page's
- # annot array. We also need it linked into the document's form
- form = mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/Fields")
- if not form.m_internal:
- form = mupdf.pdf_new_array(doc, 1)
- mupdf.pdf_dict_putl(
- mupdf.pdf_trailer(doc),
- form,
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('Fields'),
- )
- mupdf.pdf_array_push(form, annot_obj) # Cleanup relies on this statement being last
- except Exception:
- if g_exceptions_verbose: exception_info()
- mupdf.pdf_delete_annot(page, annot)
- if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- mupdf.pdf_dict_putl(
- mupdf.pdf_trailer(doc),
- mupdf.pdf_new_int(old_sigflags),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('SigFlags'),
- )
- raise
- return annot
- def JM_cropbox(page_obj):
- '''
- return a PDF page's CropBox
- '''
- if g_use_extra:
- return extra.JM_cropbox(page_obj)
-
- mediabox = JM_mediabox(page_obj)
- cropbox = mupdf.pdf_to_rect(
- mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('CropBox'))
- )
- if mupdf.fz_is_infinite_rect(cropbox) or mupdf.fz_is_empty_rect(cropbox):
- cropbox = mediabox
- y0 = mediabox.y1 - cropbox.y1
- y1 = mediabox.y1 - cropbox.y0
- cropbox.y0 = y0
- cropbox.y1 = y1
- return cropbox
- def JM_cropbox_size(page_obj):
- rect = JM_cropbox(page_obj)
- w = abs(rect.x1 - rect.x0)
- h = abs(rect.y1 - rect.y0)
- size = mupdf.fz_make_point(w, h)
- return size
- def JM_derotate_page_matrix(page):
- '''
- just the inverse of rotation
- '''
- mp = JM_rotate_page_matrix(page)
- return mupdf.fz_invert_matrix(mp)
- def JM_embed_file(
- pdf,
- buf,
- filename,
- ufilename,
- desc,
- compress,
- ):
- '''
- embed a new file in a PDF (not only /EmbeddedFiles entries)
- '''
- len_ = 0
- val = mupdf.pdf_new_dict(pdf, 6)
- mupdf.pdf_dict_put_dict(val, PDF_NAME('CI'), 4)
- ef = mupdf.pdf_dict_put_dict(val, PDF_NAME('EF'), 4)
- mupdf.pdf_dict_put_text_string(val, PDF_NAME('F'), filename)
- mupdf.pdf_dict_put_text_string(val, PDF_NAME('UF'), ufilename)
- mupdf.pdf_dict_put_text_string(val, PDF_NAME('Desc'), desc)
- mupdf.pdf_dict_put(val, PDF_NAME('Type'), PDF_NAME('Filespec'))
- bs = b' '
- f = mupdf.pdf_add_stream(
- pdf,
- #mupdf.fz_fz_new_buffer_from_copied_data(bs),
- mupdf.fz_new_buffer_from_copied_data(bs),
- mupdf.PdfObj(),
- 0,
- )
- mupdf.pdf_dict_put(ef, PDF_NAME('F'), f)
- JM_update_stream(pdf, f, buf, compress)
- len_, _ = mupdf.fz_buffer_storage(buf)
- mupdf.pdf_dict_put_int(f, PDF_NAME('DL'), len_)
- mupdf.pdf_dict_put_int(f, PDF_NAME('Length'), len_)
- params = mupdf.pdf_dict_put_dict(f, PDF_NAME('Params'), 4)
- mupdf.pdf_dict_put_int(params, PDF_NAME('Size'), len_)
- return val
- def JM_embedded_clean(pdf):
- '''
- perform some cleaning if we have /EmbeddedFiles:
- (1) remove any /Limits if /Names exists
- (2) remove any empty /Collection
- (3) set /PageMode/UseAttachments
- '''
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- # remove any empty /Collection entry
- coll = mupdf.pdf_dict_get(root, PDF_NAME('Collection'))
- if coll.m_internal and mupdf.pdf_dict_len(coll) == 0:
- mupdf.pdf_dict_del(root, PDF_NAME('Collection'))
- efiles = mupdf.pdf_dict_getl(
- root,
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- if efiles.m_internal:
- mupdf.pdf_dict_put_name(root, PDF_NAME('PageMode'), "UseAttachments")
- def JM_EscapeStrFromBuffer(buff):
- if not buff.m_internal:
- return ''
- s = mupdf.fz_buffer_extract_copy(buff)
- val = PyUnicode_DecodeRawUnicodeEscape(s, errors='replace')
- return val
- def JM_ensure_identity(pdf):
- '''
- Store ID in PDF trailer
- '''
- id_ = mupdf.pdf_dict_get( mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
- if not id_.m_internal:
- rnd0 = mupdf.fz_memrnd2(16)
- # Need to convert raw bytes into a str to send to
- # mupdf.pdf_new_string(). chr() seems to work for this.
- rnd = ''
- for i in rnd0:
- rnd += chr(i)
- id_ = mupdf.pdf_dict_put_array( mupdf.pdf_trailer( pdf), PDF_NAME('ID'), 2)
- mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
- mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
- def JM_ensure_ocproperties(pdf):
- '''
- Ensure OCProperties, return /OCProperties key
- '''
- ocp = mupdf.pdf_dict_get(mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')), PDF_NAME('OCProperties'))
- if ocp.m_internal:
- return ocp
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- ocp = mupdf.pdf_dict_put_dict(root, PDF_NAME('OCProperties'), 2)
- mupdf.pdf_dict_put_array(ocp, PDF_NAME('OCGs'), 0)
- D = mupdf.pdf_dict_put_dict(ocp, PDF_NAME('D'), 5)
- mupdf.pdf_dict_put_array(D, PDF_NAME('ON'), 0)
- mupdf.pdf_dict_put_array(D, PDF_NAME('OFF'), 0)
- mupdf.pdf_dict_put_array(D, PDF_NAME('Order'), 0)
- mupdf.pdf_dict_put_array(D, PDF_NAME('RBGroups'), 0)
- return ocp
- def JM_expand_fname(name):
- '''
- Make /DA string of annotation
- '''
- if not name: return "Helv"
- if name.startswith("Co"): return "Cour"
- if name.startswith("co"): return "Cour"
- if name.startswith("Ti"): return "TiRo"
- if name.startswith("ti"): return "TiRo"
- if name.startswith("Sy"): return "Symb"
- if name.startswith("sy"): return "Symb"
- if name.startswith("Za"): return "ZaDb"
- if name.startswith("za"): return "ZaDb"
- return "Helv"
- def JM_field_type_text(wtype):
- '''
- String from widget type
- '''
- if wtype == mupdf.PDF_WIDGET_TYPE_BUTTON:
- return "Button"
- if wtype == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
- return "CheckBox"
- if wtype == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- return "RadioButton"
- if wtype == mupdf.PDF_WIDGET_TYPE_TEXT:
- return "Text"
- if wtype == mupdf.PDF_WIDGET_TYPE_LISTBOX:
- return "ListBox"
- if wtype == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
- return "ComboBox"
- if wtype == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- return "Signature"
- return "unknown"
- def JM_fill_pixmap_rect_with_color(dest, col, b):
- assert isinstance(dest, mupdf.FzPixmap)
- # fill a rect with a color tuple
- b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest))
- w = b.x1 - b.x0
- y = b.y1 - b.y0
- if w <= 0 or y <= 0:
- return 0
- destspan = dest.stride()
- destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
- while 1:
- s = destp
- for x in range(w):
- for i in range( dest.n()):
- mupdf.fz_samples_set(dest, s, col[i])
- s += 1
- destp += destspan
- y -= 1
- if y == 0:
- break
- return 1
- def JM_find_annot_irt(annot):
- '''
- Return the first annotation whose /IRT key ("In Response To") points to
- annot. Used to remove the response chain of a given annotation.
- '''
- assert isinstance(annot, mupdf.PdfAnnot)
- irt_annot = None # returning this
- annot_obj = mupdf.pdf_annot_obj(annot)
- found = 0
- # loop thru MuPDF's internal annots array
- page = _pdf_annot_page(annot)
- irt_annot = mupdf.pdf_first_annot(page)
- while 1:
- assert isinstance(irt_annot, mupdf.PdfAnnot)
- if not irt_annot.m_internal:
- break
- irt_annot_obj = mupdf.pdf_annot_obj(irt_annot)
- o = mupdf.pdf_dict_gets(irt_annot_obj, 'IRT')
- if o.m_internal:
- if not mupdf.pdf_objcmp(o, annot_obj):
- found = 1
- break
- irt_annot = mupdf.pdf_next_annot(irt_annot)
- if found:
- return irt_annot
- def JM_font_ascender(font):
- '''
- need own versions of ascender / descender
- '''
- assert isinstance(font, mupdf.FzFont)
- if _globals.skip_quad_corrections:
- return 0.8
- return mupdf.fz_font_ascender(font)
- def JM_font_descender(font):
- '''
- need own versions of ascender / descender
- '''
- assert isinstance(font, mupdf.FzFont)
- if _globals.skip_quad_corrections:
- return -0.2
- ret = mupdf.fz_font_descender(font)
- return ret
- def JM_is_word_delimiter(ch, delimiters):
- """Check if ch is an extra word delimiting character.
- """
- if (0
- or ch <= 32
- or ch == 160
- or 0x202a <= ch <= 0x202e
- ):
- # covers any whitespace plus unicodes that switch between
- # right-to-left and left-to-right languages
- return True
- if not delimiters: # no extra delimiters provided
- return False
- char = chr(ch)
- for d in delimiters:
- if d == char:
- return True
- return False
-
- def JM_is_rtl_char(ch):
- if ch < 0x590 or ch > 0x900:
- return False
- return True
- def JM_font_name(font):
- assert isinstance(font, mupdf.FzFont)
- name = mupdf.fz_font_name(font)
- s = name.find('+')
- if _globals.subset_fontnames or s == -1 or s != 6:
- return name
- return name[s + 1:]
- def JM_gather_fonts(pdf, dict_, fontlist, stream_xref):
- rc = 1
- n = mupdf.pdf_dict_len(dict_)
- for i in range(n):
- refname = mupdf.pdf_dict_get_key(dict_, i)
- fontdict = mupdf.pdf_dict_get_val(dict_, i)
- if not mupdf.pdf_is_dict(fontdict):
- mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no font dict ({mupdf.pdf_to_num(fontdict)} 0 R)")
- continue
- subtype = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Subtype)
- basefont = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_BaseFont)
- if not basefont.m_internal or mupdf.pdf_is_null(basefont):
- name = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Name)
- else:
- name = basefont
- encoding = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Encoding)
- if mupdf.pdf_is_dict(encoding):
- encoding = mupdf.pdf_dict_get(encoding, mupdf.PDF_ENUM_NAME_BaseEncoding)
- xref = mupdf.pdf_to_num(fontdict)
- ext = "n/a"
- if xref:
- ext = JM_get_fontextension(pdf, xref)
- entry = (
- xref,
- ext,
- mupdf.pdf_to_name(subtype),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(name)),
- mupdf.pdf_to_name(refname),
- mupdf.pdf_to_name(encoding),
- stream_xref,
- )
- fontlist.append(entry)
- return rc
- def JM_gather_forms(doc, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
- '''
- Store info of a /Form xobject in Python list
- '''
- assert isinstance(doc, mupdf.PdfDocument)
- rc = 1
- n = mupdf.pdf_dict_len(dict_)
- for i in range(n):
- refname = mupdf.pdf_dict_get_key( dict_, i)
- imagedict = mupdf.pdf_dict_get_val(dict_, i)
- if not mupdf.pdf_is_dict(imagedict):
- mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no form dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
- continue
- type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
- if not mupdf.pdf_name_eq(type_, PDF_NAME('Form')):
- continue
- o = mupdf.pdf_dict_get(imagedict, PDF_NAME('BBox'))
- m = mupdf.pdf_dict_get(imagedict, PDF_NAME('Matrix'))
- if m.m_internal:
- mat = mupdf.pdf_to_matrix(m)
- else:
- mat = mupdf.FzMatrix()
- if o.m_internal:
- bbox = mupdf.fz_transform_rect( mupdf.pdf_to_rect(o), mat)
- else:
- bbox = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
- xref = mupdf.pdf_to_num(imagedict)
- entry = (
- xref,
- mupdf.pdf_to_name( refname),
- stream_xref,
- JM_py_from_rect(bbox),
- )
- imagelist.append(entry)
- return rc
- def JM_gather_images(doc: mupdf.PdfDocument, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
- '''
- Store info of an image in Python list
- '''
- rc = 1
- n = mupdf.pdf_dict_len( dict_)
- for i in range(n):
- refname = mupdf.pdf_dict_get_key(dict_, i)
- imagedict = mupdf.pdf_dict_get_val(dict_, i)
- if not mupdf.pdf_is_dict(imagedict):
- mupdf.fz_warn(f"'{mupdf.pdf_to_name(refname)}' is no image dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
- continue
- type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
- if not mupdf.pdf_name_eq(type_, PDF_NAME('Image')):
- continue
- xref = mupdf.pdf_to_num(imagedict)
- gen = 0
- smask = mupdf.pdf_dict_geta(imagedict, PDF_NAME('SMask'), PDF_NAME('Mask'))
- if smask.m_internal:
- gen = mupdf.pdf_to_num(smask)
- filter_ = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Filter'), PDF_NAME('F'))
- if mupdf.pdf_is_array(filter_):
- filter_ = mupdf.pdf_array_get(filter_, 0)
- altcs = mupdf.PdfObj(0)
- cs = mupdf.pdf_dict_geta(imagedict, PDF_NAME('ColorSpace'), PDF_NAME('CS'))
- if mupdf.pdf_is_array(cs):
- cses = cs
- cs = mupdf.pdf_array_get(cses, 0)
- if (mupdf.pdf_name_eq(cs, PDF_NAME('DeviceN'))
- or mupdf.pdf_name_eq(cs, PDF_NAME('Separation'))
- ):
- altcs = mupdf.pdf_array_get(cses, 2)
- if mupdf.pdf_is_array(altcs):
- altcs = mupdf.pdf_array_get(altcs, 0)
- width = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Width'), PDF_NAME('W'))
- height = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Height'), PDF_NAME('H'))
- bpc = mupdf.pdf_dict_geta(imagedict, PDF_NAME('BitsPerComponent'), PDF_NAME('BPC'))
- entry = (
- xref,
- gen,
- mupdf.pdf_to_int(width),
- mupdf.pdf_to_int(height),
- mupdf.pdf_to_int(bpc),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(cs)),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(altcs)),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(refname)),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(filter_)),
- stream_xref,
- )
- imagelist.append(entry)
- return rc
- def JM_get_annot_by_xref(page, xref):
- '''
- retrieve annot by its xref
- '''
- assert isinstance(page, mupdf.PdfPage)
- found = 0
- # loop thru MuPDF's internal annots array
- annot = mupdf.pdf_first_annot(page)
- while 1:
- if not annot.m_internal:
- break
- if xref == mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)):
- found = 1
- break
- annot = mupdf.pdf_next_annot( annot)
- if not found:
- raise Exception("xref %d is not an annot of this page" % xref)
- return annot
- def JM_get_annot_by_name(page, name):
- '''
- retrieve annot by name (/NM key)
- '''
- assert isinstance(page, mupdf.PdfPage)
- if not name:
- return
- found = 0
- # loop thru MuPDF's internal annots and widget arrays
- annot = mupdf.pdf_first_annot(page)
- while 1:
- if not annot.m_internal:
- break
- response, len_ = mupdf.pdf_to_string(mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM"))
- if name == response:
- found = 1
- break
- annot = mupdf.pdf_next_annot(annot)
- if not found:
- raise Exception("'%s' is not an annot of this page" % name)
- return annot
- def JM_get_annot_id_list(page):
- names = []
- annots = mupdf.pdf_dict_get( page.obj(), mupdf.PDF_ENUM_NAME_Annots)
- if not annots.m_internal:
- return names
- for i in range( mupdf.pdf_array_len(annots)):
- annot_obj = mupdf.pdf_array_get(annots, i)
- name = mupdf.pdf_dict_gets(annot_obj, "NM")
- if name.m_internal:
- names.append(
- mupdf.pdf_to_text_string(name)
- )
- return names
- def JM_get_annot_xref_list( page_obj):
- '''
- return the xrefs and /NM ids of a page's annots, links and fields
- '''
- if g_use_extra:
- names = extra.JM_get_annot_xref_list( page_obj)
- return names
-
- names = []
- annots = mupdf.pdf_dict_get( page_obj, PDF_NAME('Annots'))
- n = mupdf.pdf_array_len( annots)
- for i in range( n):
- annot_obj = mupdf.pdf_array_get( annots, i)
- xref = mupdf.pdf_to_num( annot_obj)
- subtype = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Subtype'))
- if not subtype.m_internal:
- continue # subtype is required
- type_ = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subtype))
- if type_ == mupdf.PDF_ANNOT_UNKNOWN:
- continue # only accept valid annot types
- id_ = mupdf.pdf_dict_gets( annot_obj, "NM")
- names.append( (xref, type_, mupdf.pdf_to_text_string( id_)))
- return names
- def JM_get_annot_xref_list2(page):
- page = page._pdf_page(required=False)
- if not page.m_internal:
- return list()
- return JM_get_annot_xref_list( page.obj())
- def JM_get_border_style(style):
- '''
- return pdf_obj "border style" from Python str
- '''
- val = mupdf.PDF_ENUM_NAME_S
- if style is None:
- return val
- s = style
- if s.startswith("b") or s.startswith("B"): val = mupdf.PDF_ENUM_NAME_B
- elif s.startswith("d") or s.startswith("D"): val = mupdf.PDF_ENUM_NAME_D
- elif s.startswith("i") or s.startswith("I"): val = mupdf.PDF_ENUM_NAME_I
- elif s.startswith("u") or s.startswith("U"): val = mupdf.PDF_ENUM_NAME_U
- elif s.startswith("s") or s.startswith("S"): val = mupdf.PDF_ENUM_NAME_S
- return val
- def JM_get_font(
- fontname,
- fontfile,
- fontbuffer,
- script,
- lang,
- ordering,
- is_bold,
- is_italic,
- is_serif,
- embed,
- ):
- '''
- return a fz_font from a number of parameters
- '''
- def fertig(font):
- if not font.m_internal:
- raise RuntimeError(MSG_FONT_FAILED)
- # if font allows this, set embedding
- if not font.m_internal.flags.never_embed:
- mupdf.fz_set_font_embedding(font, embed)
- return font
-
- index = 0
- font = None
- if fontfile:
- #goto have_file;
- font = mupdf.fz_new_font_from_file( None, fontfile, index, 0)
- return fertig(font)
- if fontbuffer:
- #goto have_buffer;
- res = JM_BufferFromBytes(fontbuffer)
- font = mupdf.fz_new_font_from_buffer( None, res, index, 0)
- return fertig(font)
- if ordering > -1:
- # goto have_cjk;
- font = mupdf.fz_new_cjk_font(ordering)
- return fertig(font)
- if fontname:
- # goto have_base14;
- # Base-14 or a MuPDF builtin font
- font = mupdf.fz_new_base14_font(fontname)
- if font.m_internal:
- return fertig(font)
- font = mupdf.fz_new_builtin_font(fontname, is_bold, is_italic)
- return fertig(font)
-
- # Check for NOTO font
- #have_noto:;
- data, size, index = mupdf.fz_lookup_noto_font( script, lang)
- font = None
- if data:
- font = mupdf.fz_new_font_from_memory( None, data, size, index, 0)
- if font.m_internal:
- return fertig(font)
- font = mupdf.fz_load_fallback_font( script, lang, is_serif, is_bold, is_italic)
- return fertig(font)
-
- def JM_get_fontbuffer(doc, xref):
- '''
- Return the contents of a font file, identified by xref
- '''
- if xref < 1:
- return
- o = mupdf.pdf_load_object(doc, xref)
- desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
- if desft.m_internal:
- obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
- obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
- else:
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
- if not obj.m_internal:
- message(f"invalid font - FontDescriptor missing")
- return
- o = obj
- stream = None
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
- if obj.m_internal:
- stream = obj # ext = "pfa"
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
- if obj.m_internal:
- stream = obj # ext = "ttf"
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
- if obj.m_internal:
- stream = obj
- obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
- if obj.m_internal and not mupdf.pdf_is_name(obj):
- message("invalid font descriptor subtype")
- return
- if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
- pass # Prev code did: ext = "cff", but this has no effect.
- elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
- pass # Prev code did: ext = "cid", but this has no effect.
- elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
- pass # Prev code did: ext = "otf", but this has no effect. */
- else:
- message('warning: unhandled font type {pdf_to_name(ctx, obj)!r}')
- if not stream:
- message('warning: unhandled font type')
- return
- return mupdf.pdf_load_stream(stream)
- def JM_get_resource_properties(ref):
- '''
- Return the items of Resources/Properties (used for Marked Content)
- Argument may be e.g. a page object or a Form XObject
- '''
- properties = mupdf.pdf_dict_getl(ref, PDF_NAME('Resources'), PDF_NAME('Properties'))
- if not properties.m_internal:
- return ()
- else:
- n = mupdf.pdf_dict_len(properties)
- if n < 1:
- return ()
- rc = []
- for i in range(n):
- key = mupdf.pdf_dict_get_key(properties, i)
- val = mupdf.pdf_dict_get_val(properties, i)
- c = mupdf.pdf_to_name(key)
- xref = mupdf.pdf_to_num(val)
- rc.append((c, xref))
- return rc
- def JM_get_widget_by_xref( page, xref):
- '''
- retrieve widget by its xref
- '''
- found = False
- annot = mupdf.pdf_first_widget( page)
- while annot.m_internal:
- annot_obj = mupdf.pdf_annot_obj( annot)
- if xref == mupdf.pdf_to_num( annot_obj):
- found = True
- break
- annot = mupdf.pdf_next_widget( annot)
- if not found:
- raise Exception( f"xref {xref} is not a widget of this page")
- return Annot( annot)
- def JM_get_widget_properties(annot, Widget):
- '''
- Populate a Python Widget object with the values from a PDF form field.
- Called by "Page.first_widget" and "Widget.next".
- '''
- #log( '{type(annot)=}')
- annot_obj = mupdf.pdf_annot_obj(annot.this)
- #log( 'Have called mupdf.pdf_annot_obj()')
- page = _pdf_annot_page(annot.this)
- pdf = page.doc()
- tw = annot
- def SETATTR(key, value):
- setattr(Widget, key, value)
- def SETATTR_DROP(mod, key, value):
- # Original C code for this function deletes if PyObject* is NULL. We
- # don't have a representation for that in Python - e.g. None is not
- # represented by NULL.
- setattr(mod, key, value)
- #log( '=== + mupdf.pdf_widget_type(tw)')
- field_type = mupdf.pdf_widget_type(tw.this)
- #log( '=== - mupdf.pdf_widget_type(tw)')
- Widget.field_type = field_type
- if field_type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- if mupdf.pdf_signature_is_signed(pdf, annot_obj):
- SETATTR("is_signed", True)
- else:
- SETATTR("is_signed",False)
- else:
- SETATTR("is_signed", None)
- SETATTR_DROP(Widget, "border_style", JM_UnicodeFromStr(mupdf.pdf_field_border_style(annot_obj)))
- SETATTR_DROP(Widget, "field_type_string", JM_UnicodeFromStr(JM_field_type_text(field_type)))
- field_name = mupdf.pdf_load_field_name(annot_obj)
- SETATTR_DROP(Widget, "field_name", field_name)
- def pdf_dict_get_inheritable_nonempty_label(node, key):
- '''
- This is a modified version of MuPDF's pdf_dict_get_inheritable(), with
- some changes:
- * Returns string from pdf_to_text_string() or None if not found.
- * Recurses to parent if current node exists but with empty string
- value.
- '''
- slow = node
- halfbeat = 11 # Don't start moving slow pointer for a while.
- while 1:
- if not node.m_internal:
- return
- val = mupdf.pdf_dict_get(node, key)
- if val.m_internal:
- label = mupdf.pdf_to_text_string(val)
- if label:
- return label
- node = mupdf.pdf_dict_get(node, PDF_NAME('Parent'))
- if node.m_internal == slow.m_internal:
- raise Exception("cycle in resources")
- halfbeat -= 1
- if halfbeat == 0:
- slow = mupdf.pdf_dict_get(slow, PDF_NAME('Parent'))
- halfbeat = 2
-
- # In order to address #3950, we use our modified pdf_dict_get_inheritable()
- # to ignore empty-string child values.
- label = pdf_dict_get_inheritable_nonempty_label(annot_obj, PDF_NAME('TU'))
- if label is not None:
- SETATTR_DROP(Widget, "field_label", label)
- fvalue = None
- if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Parent')) # owning RB group
- if obj.m_internal:
- SETATTR_DROP(Widget, "rb_parent", mupdf.pdf_to_num( obj))
- obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('AS'))
- if obj.m_internal:
- fvalue = mupdf.pdf_to_name(obj)
- if not fvalue:
- fvalue = mupdf.pdf_field_value(annot_obj)
- SETATTR_DROP(Widget, "field_value", JM_UnicodeFromStr(fvalue))
- SETATTR_DROP(Widget, "field_display", mupdf.pdf_field_display(annot_obj))
- border_width = mupdf.pdf_to_real(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('W')))
- if border_width == 0:
- border_width = 1
- SETATTR_DROP(Widget, "border_width", border_width)
- obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('D'))
- if mupdf.pdf_is_array(obj):
- n = mupdf.pdf_array_len(obj)
- d = [0] * n
- for i in range(n):
- d[i] = mupdf.pdf_to_int(mupdf.pdf_array_get(obj, i))
- SETATTR_DROP(Widget, "border_dashes", d)
- SETATTR_DROP(Widget, "text_maxlen", mupdf.pdf_text_widget_max_len(tw.this))
- SETATTR_DROP(Widget, "text_format", mupdf.pdf_text_widget_format(tw.this))
- obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BG'))
- if mupdf.pdf_is_array(obj):
- n = mupdf.pdf_array_len(obj)
- col = [0] * n
- for i in range(n):
- col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
- SETATTR_DROP(Widget, "fill_color", col)
- obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BC'))
- if mupdf.pdf_is_array(obj):
- n = mupdf.pdf_array_len(obj)
- col = [0] * n
- for i in range(n):
- col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
- SETATTR_DROP(Widget, "border_color", col)
- SETATTR_DROP(Widget, "choice_values", JM_choice_options(annot))
- da = mupdf.pdf_to_text_string(mupdf.pdf_dict_get_inheritable(annot_obj, PDF_NAME('DA')))
- SETATTR_DROP(Widget, "_text_da", JM_UnicodeFromStr(da))
- obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('CA'))
- if obj.m_internal:
- SETATTR_DROP(Widget, "button_caption", JM_UnicodeFromStr(mupdf.pdf_to_text_string(obj)))
- SETATTR_DROP(Widget, "field_flags", mupdf.pdf_field_flags(annot_obj))
- # call Py method to reconstruct text color, font name, size
- Widget._parse_da()
- # extract JavaScript action texts
- s = mupdf.pdf_dict_get(annot_obj, PDF_NAME('A'))
- ss = JM_get_script(s)
- SETATTR_DROP(Widget, "script", ss)
- SETATTR_DROP(Widget, "script_stroke",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('K')))
- )
- SETATTR_DROP(Widget, "script_format",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('F')))
- )
- SETATTR_DROP(Widget, "script_change",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('V')))
- )
- SETATTR_DROP(Widget, "script_calc",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('C')))
- )
- SETATTR_DROP(Widget, "script_blur",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl')))
- )
- SETATTR_DROP(Widget, "script_focus",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo')))
- )
- def JM_get_fontextension(doc, xref):
- '''
- Return the file extension of a font file, identified by xref
- '''
- if xref < 1:
- return "n/a"
- o = mupdf.pdf_load_object(doc, xref)
- desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
- if desft.m_internal:
- obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
- obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
- else:
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
- if not obj.m_internal:
- return "n/a" # this is a base-14 font
- o = obj # we have the FontDescriptor
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
- if obj.m_internal:
- return "pfa"
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
- if obj.m_internal:
- return "ttf"
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
- if obj.m_internal:
- obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
- if obj.m_internal and not mupdf.pdf_is_name(obj):
- message("invalid font descriptor subtype")
- return "n/a"
- if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
- return "cff"
- elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
- return "cid"
- elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
- return "otf"
- else:
- message("unhandled font type '%s'", mupdf.pdf_to_name(obj))
- return "n/a"
- def JM_get_ocg_arrays_imp(arr):
- '''
- Get OCG arrays from OC configuration
- Returns dict {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list}
- '''
- list_ = list()
- if mupdf.pdf_is_array( arr):
- n = mupdf.pdf_array_len( arr)
- for i in range(n):
- obj = mupdf.pdf_array_get( arr, i)
- item = mupdf.pdf_to_num( obj)
- if item not in list_:
- list_.append(item)
- return list_
- def JM_get_ocg_arrays(conf):
- rc = dict()
- arr = mupdf.pdf_dict_get( conf, PDF_NAME('ON'))
- list_ = JM_get_ocg_arrays_imp( arr)
- if list_:
- rc["on"] = list_
- arr = mupdf.pdf_dict_get( conf, PDF_NAME('OFF'))
- list_ = JM_get_ocg_arrays_imp( arr)
- if list_:
- rc["off"] = list_
- arr = mupdf.pdf_dict_get( conf, PDF_NAME('Locked'))
- list_ = JM_get_ocg_arrays_imp( arr)
- if list_:
- rc['locked'] = list_
- list_ = list()
- arr = mupdf.pdf_dict_get( conf, PDF_NAME('RBGroups'))
- if mupdf.pdf_is_array( arr):
- n = mupdf.pdf_array_len( arr)
- for i in range(n):
- obj = mupdf.pdf_array_get( arr, i)
- list1 = JM_get_ocg_arrays_imp( obj)
- list_.append(list1)
- if list_:
- rc["rbgroups"] = list_
- obj = mupdf.pdf_dict_get( conf, PDF_NAME('BaseState'))
- if obj.m_internal:
- state = mupdf.pdf_to_name( obj)
- rc["basestate"] = state
- return rc
- def JM_get_page_labels(liste, nums):
- n = mupdf.pdf_array_len(nums)
- for i in range(0, n, 2):
- key = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i))
- pno = mupdf.pdf_to_int(key)
- val = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i + 1))
- res = JM_object_to_buffer(val, 1, 0)
- c = mupdf.fz_buffer_extract(res)
- assert isinstance(c, bytes)
- c = c.decode('utf-8')
- liste.append( (pno, c))
- def JM_get_script(key):
- '''
- JavaScript extractor
- Returns either the script source or None. Parameter is a PDF action
- dictionary, which must have keys /S and /JS. The value of /S must be
- '/JavaScript'. The value of /JS is returned.
- '''
- if not key.m_internal:
- return
- j = mupdf.pdf_dict_get(key, PDF_NAME('S'))
- jj = mupdf.pdf_to_name(j)
- if jj == "JavaScript":
- js = mupdf.pdf_dict_get(key, PDF_NAME('JS'))
- if not js.m_internal:
- return
- else:
- return
- if mupdf.pdf_is_string(js):
- script = JM_UnicodeFromStr(mupdf.pdf_to_text_string(js))
- elif mupdf.pdf_is_stream(js):
- res = mupdf.pdf_load_stream(js)
- script = JM_EscapeStrFromBuffer(res)
- else:
- return
- if script: # do not return an empty script
- return script
- return
- def JM_have_operation(pdf):
- '''
- Ensure valid journalling state
- '''
- if pdf.m_internal.journal and not mupdf.pdf_undoredo_step(pdf, 0):
- return 0
- return 1
- def JM_image_extension(type_):
- '''
- return extension for MuPDF image type
- '''
- if type_ == mupdf.FZ_IMAGE_FAX: return "fax"
- if type_ == mupdf.FZ_IMAGE_RAW: return "raw"
- if type_ == mupdf.FZ_IMAGE_FLATE: return "flate"
- if type_ == mupdf.FZ_IMAGE_LZW: return "lzw"
- if type_ == mupdf.FZ_IMAGE_RLD: return "rld"
- if type_ == mupdf.FZ_IMAGE_BMP: return "bmp"
- if type_ == mupdf.FZ_IMAGE_GIF: return "gif"
- if type_ == mupdf.FZ_IMAGE_JBIG2: return "jb2"
- if type_ == mupdf.FZ_IMAGE_JPEG: return "jpeg"
- if type_ == mupdf.FZ_IMAGE_JPX: return "jpx"
- if type_ == mupdf.FZ_IMAGE_JXR: return "jxr"
- if type_ == mupdf.FZ_IMAGE_PNG: return "png"
- if type_ == mupdf.FZ_IMAGE_PNM: return "pnm"
- if type_ == mupdf.FZ_IMAGE_TIFF: return "tiff"
- #if type_ == mupdf.FZ_IMAGE_PSD: return "psd"
- return "n/a"
- # fixme: need to avoid using a global for this.
- g_img_info = None
- def JM_image_filter(opaque, ctm, name, image):
- assert isinstance(ctm, mupdf.FzMatrix)
- r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm)
- q = mupdf.fz_transform_quad( q, g_img_info_matrix)
- temp = name, JM_py_from_quad(q)
- g_img_info.append(temp)
- def JM_image_profile( imagedata, keep_image):
- '''
- Return basic properties of an image provided as bytes or bytearray
- The function creates an fz_image and optionally returns it.
- '''
- if not imagedata:
- return None # nothing given
-
- len_ = len( imagedata)
- if len_ < 8:
- message( "bad image data")
- return None
- c = imagedata
- #log( 'calling mfz_recognize_image_format with {c!r=}')
- type_ = mupdf.fz_recognize_image_format( c)
- if type_ == mupdf.FZ_IMAGE_UNKNOWN:
- return None
- if keep_image:
- res = mupdf.fz_new_buffer_from_copied_data( c, len_)
- else:
- res = mupdf.fz_new_buffer_from_shared_data( c, len_)
- image = mupdf.fz_new_image_from_buffer( res)
- ctm = mupdf.fz_image_orientation_matrix( image)
- xres, yres = mupdf.fz_image_resolution(image)
- orientation = mupdf.fz_image_orientation( image)
- cs_name = mupdf.fz_colorspace_name( image.colorspace())
- result = dict()
- result[ dictkey_width] = image.w()
- result[ dictkey_height] = image.h()
- result[ "orientation"] = orientation
- result[ dictkey_matrix] = JM_py_from_matrix(ctm)
- result[ dictkey_xres] = xres
- result[ dictkey_yres] = yres
- result[ dictkey_colorspace] = image.n()
- result[ dictkey_bpc] = image.bpc()
- result[ dictkey_ext] = JM_image_extension(type_)
- result[ dictkey_cs_name] = cs_name
- if keep_image:
- result[ dictkey_image] = image
- return result
- def JM_image_reporter(page):
- doc = page.doc()
- global g_img_info_matrix
- g_img_info_matrix = mupdf.FzMatrix()
- mediabox = mupdf.FzRect()
- mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix)
- class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
- def __init__(self):
- super().__init__()
- self.use_virtual_image_filter()
- def image_filter(self, ctx, ctm, name, image, scissor):
- JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
- sanitize_filter_options = SanitizeFilterOptions()
- filter_options = _make_PdfFilterOptions(
- instance_forms=1,
- ascii=1,
- no_update=1,
- sanitize=1,
- sopts=sanitize_filter_options,
- )
- global g_img_info
- g_img_info = []
- mupdf.pdf_filter_page_contents( doc, page, filter_options)
- rc = tuple(g_img_info)
- g_img_info = []
- return rc
- def JM_fitz_config():
- have_TOFU = not hasattr(mupdf, 'TOFU')
- have_TOFU_BASE14 = not hasattr(mupdf, 'TOFU_BASE14')
- have_TOFU_CJK = not hasattr(mupdf, 'TOFU_CJK')
- have_TOFU_CJK_EXT = not hasattr(mupdf, 'TOFU_CJK_EXT')
- have_TOFU_CJK_LANG = not hasattr(mupdf, 'TOFU_CJK_LANG')
- have_TOFU_EMOJI = not hasattr(mupdf, 'TOFU_EMOJI')
- have_TOFU_HISTORIC = not hasattr(mupdf, 'TOFU_HISTORIC')
- have_TOFU_SIL = not hasattr(mupdf, 'TOFU_SIL')
- have_TOFU_SYMBOL = not hasattr(mupdf, 'TOFU_SYMBOL')
-
- ret = dict()
- ret["base14"] = have_TOFU_BASE14
- ret["cbz"] = bool(mupdf.FZ_ENABLE_CBZ)
- ret["epub"] = bool(mupdf.FZ_ENABLE_EPUB)
- ret["html"] = bool(mupdf.FZ_ENABLE_HTML)
- ret["icc"] = bool(mupdf.FZ_ENABLE_ICC)
- ret["img"] = bool(mupdf.FZ_ENABLE_IMG)
- ret["jpx"] = bool(mupdf.FZ_ENABLE_JPX)
- ret["js"] = bool(mupdf.FZ_ENABLE_JS)
- ret["pdf"] = bool(mupdf.FZ_ENABLE_PDF)
- ret["plotter-cmyk"] = bool(mupdf.FZ_PLOTTERS_CMYK)
- ret["plotter-g"] = bool(mupdf.FZ_PLOTTERS_G)
- ret["plotter-n"] = bool(mupdf.FZ_PLOTTERS_N)
- ret["plotter-rgb"] = bool(mupdf.FZ_PLOTTERS_RGB)
- ret["py-memory"] = bool(JM_MEMORY)
- ret["svg"] = bool(mupdf.FZ_ENABLE_SVG)
- ret["tofu"] = have_TOFU
- ret["tofu-cjk"] = have_TOFU_CJK
- ret["tofu-cjk-ext"] = have_TOFU_CJK_EXT
- ret["tofu-cjk-lang"] = have_TOFU_CJK_LANG
- ret["tofu-emoji"] = have_TOFU_EMOJI
- ret["tofu-historic"] = have_TOFU_HISTORIC
- ret["tofu-sil"] = have_TOFU_SIL
- ret["tofu-symbol"] = have_TOFU_SYMBOL
- ret["xps"] = bool(mupdf.FZ_ENABLE_XPS)
- return ret
- def JM_insert_contents(pdf, pageref, newcont, overlay):
- '''
- Insert a buffer as a new separate /Contents object of a page.
- 1. Create a new stream object from buffer 'newcont'
- 2. If /Contents already is an array, then just prepend or append this object
- 3. Else, create new array and put old content obj and this object into it.
- If the page had no /Contents before, just create a 1-item array.
- '''
- contents = mupdf.pdf_dict_get(pageref, PDF_NAME('Contents'))
- newconts = mupdf.pdf_add_stream(pdf, newcont, mupdf.PdfObj(), 0)
- xref = mupdf.pdf_to_num(newconts)
- if mupdf.pdf_is_array(contents):
- if overlay: # append new object
- mupdf.pdf_array_push(contents, newconts)
- else: # prepend new object
- mupdf.pdf_array_insert(contents, newconts, 0)
- else:
- carr = mupdf.pdf_new_array(pdf, 5)
- if overlay:
- if contents.m_internal:
- mupdf.pdf_array_push(carr, contents)
- mupdf.pdf_array_push(carr, newconts)
- else:
- mupdf.pdf_array_push(carr, newconts)
- if contents.m_internal:
- mupdf.pdf_array_push(carr, contents)
- mupdf.pdf_dict_put(pageref, PDF_NAME('Contents'), carr)
- return xref
- def JM_insert_font(pdf, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
- '''
- Insert a font in a PDF
- '''
- font = None
- res = None
- data = None
- ixref = 0
- index = 0
- simple = 0
- value=None
- name=None
- subt=None
- exto = None
- ENSURE_OPERATION(pdf)
- # check for CJK font
- if ordering > -1:
- data, size, index = mupdf.fz_lookup_cjk_font(ordering)
- if data:
- font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
- font_obj = mupdf.pdf_add_cjk_font(pdf, font, ordering, wmode, serif)
- exto = "n/a"
- simple = 0
- #goto weiter;
- else:
- # check for PDF Base-14 font
- if bfname:
- data, size = mupdf.fz_lookup_base14_font(bfname)
- if data:
- font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
- font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
- exto = "n/a"
- simple = 1
- #goto weiter;
- else:
- if fontfile:
- font = mupdf.fz_new_font_from_file(None, fontfile, idx, 0)
- else:
- res = JM_BufferFromBytes(fontbuffer)
- if not res.m_internal:
- RAISEPY(MSG_FILE_OR_BUFFER, PyExc_ValueError)
- font = mupdf.fz_new_font_from_buffer(None, res, idx, 0)
- if not set_simple:
- font_obj = mupdf.pdf_add_cid_font(pdf, font)
- simple = 0
- else:
- font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
- simple = 2
- #weiter: ;
- ixref = mupdf.pdf_to_num(font_obj)
- name = JM_EscapeStrFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get(font_obj, PDF_NAME('BaseFont'))))
- subt = JM_UnicodeFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get( font_obj, PDF_NAME('Subtype'))))
- if not exto:
- exto = JM_UnicodeFromStr(JM_get_fontextension(pdf, ixref))
- asc = mupdf.fz_font_ascender(font)
- dsc = mupdf.fz_font_descender(font)
- value = [
- ixref,
- {
- "name": name, # base font name
- "type": subt, # subtype
- "ext": exto, # file extension
- "simple": bool(simple), # simple font?
- "ordering": ordering, # CJK font?
- "ascender": asc,
- "descender": dsc,
- },
- ]
- return value
- def JM_irect_from_py(r):
- '''
- PySequence to mupdf.FzIrect. Default: infinite irect
- '''
- if isinstance(r, mupdf.FzIrect):
- return r
- if isinstance(r, IRect):
- r = mupdf.FzIrect( r.x0, r.y0, r.x1, r.y1)
- return r
- if isinstance(r, Rect):
- ret = mupdf.FzRect(r.x0, r.y0, r.x1, r.y1)
- ret = mupdf.FzIrect(ret) # Uses fz_irect_from_rect().
- return ret
- if isinstance(r, mupdf.FzRect):
- ret = mupdf.FzIrect(r) # Uses fz_irect_from_rect().
- return ret
- if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
- return mupdf.FzIrect(mupdf.fz_infinite_irect)
- f = [0, 0, 0, 0]
- for i in range(4):
- f[i] = r[i]
- if f[i] is None:
- return mupdf.FzIrect(mupdf.fz_infinite_irect)
- if f[i] < FZ_MIN_INF_RECT:
- f[i] = FZ_MIN_INF_RECT
- if f[i] > FZ_MAX_INF_RECT:
- f[i] = FZ_MAX_INF_RECT
- return mupdf.fz_make_irect(f[0], f[1], f[2], f[3])
- def JM_listbox_value( annot):
- '''
- ListBox retrieve value
- '''
- # may be single value or array
- annot_obj = mupdf.pdf_annot_obj( annot)
- optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('V'))
- if mupdf.pdf_is_string( optarr): # a single string
- return mupdf.pdf_to_text_string( optarr)
- # value is an array (may have len 0)
- n = mupdf.pdf_array_len( optarr)
- liste = []
- # extract a list of strings
- # each entry may again be an array: take second entry then
- for i in range( n):
- elem = mupdf.pdf_array_get( optarr, i)
- if mupdf.pdf_is_array( elem):
- elem = mupdf.pdf_array_get( elem, 1)
- liste.append( JM_UnicodeFromStr( mupdf.pdf_to_text_string( elem)))
- return liste
- def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
- # PyMuPDF uses a fz_buffer to build up the string, but it's non-trivial to
- # convert the fz_buffer's `unsigned char*` into a `const char*` suitable
- # for passing to pdf_dict_put_text_string(). So instead we build up the
- # string directly in Python.
- buf = ''
- if ncol < 1:
- buf += f'0 g '
- elif ncol == 1:
- buf += f'{col[0]:g} g '
- elif ncol == 2:
- assert 0
- elif ncol == 3:
- buf += f'{col[0]:g} {col[1]:g} {col[2]:g} rg '
- else:
- buf += f'{col[0]:g} {col[1]:g} {col[2]:g} {col[3]:g} k '
- buf += f'/{JM_expand_fname(fontname)} {fontsize} Tf'
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf)
- def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
- if 1 or g_use_extra:
- return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
- char_list = None
- span_list = []
- mupdf.fz_clear_buffer(buff)
- span_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- line_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- class char_style:
- def __init__(self, rhs=None):
- if rhs:
- self.size = rhs.size
- self.flags = rhs.flags
- if mupdf_version_tuple >= (1, 25, 2):
- self.char_flags = rhs.char_flags
- self.font = rhs.font
- self.argb = rhs.argb
- self.asc = rhs.asc
- self.desc = rhs.desc
- self.bidi = rhs.bidi
- else:
- self.size = -1
- self.flags = -1
- if mupdf_version_tuple >= (1, 25, 2):
- self.char_flags = -1
- self.font = ''
- self.argb = -1
- self.asc = 0
- self.desc = 0
- self.bidi = 0
- def __str__(self):
- ret = f'{self.size} {self.flags}'
- if mupdf_version_tuple >= (1, 25, 2):
- ret += f' {self.char_flags}'
- ret += f' {self.font} {self.color} {self.asc} {self.desc}'
- return ret
- old_style = char_style()
- style = char_style()
- span = None
- span_origin = None
- for ch in line:
- # start-trace
- r = JM_char_bbox(line, ch)
- if (not JM_rects_overlap(tp_rect, r)
- and not mupdf.fz_is_infinite_rect(tp_rect)
- ):
- continue
- # Info from:
- # detect_super_script()
- # fz_font_is_italic()
- # fz_font_is_serif()
- # fz_font_is_monospaced()
- # fz_font_is_bold()
-
- flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
- origin = mupdf.FzPoint(ch.m_internal.origin)
- style.size = ch.m_internal.size
- style.flags = flags
- if mupdf_version_tuple >= (1, 25, 2):
- # FZ_STEXT_SYNTHETIC is per-char, not per-span.
- style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC
- style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
- style.argb = ch.m_internal.argb
- style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
- style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
- style.bidi = ch.m_internal.bidi
- if (style.size != old_style.size
- or style.flags != old_style.flags
- or (mupdf_version_tuple >= (1, 25, 2)
- and (style.char_flags != old_style.char_flags)
- )
- or style.argb != old_style.argb
- or style.font != old_style.font
- or style.bidi != old_style.bidi
- ):
- if old_style.size >= 0:
- # not first one, output previous
- if raw:
- # put character list in the span
- span[dictkey_chars] = char_list
- char_list = None
- else:
- # put text string in the span
- span[dictkey_text] = JM_EscapeStrFromBuffer( buff)
- mupdf.fz_clear_buffer(buff)
- span[dictkey_origin] = JM_py_from_point(span_origin)
- span[dictkey_bbox] = JM_py_from_rect(span_rect)
- line_rect = mupdf.fz_union_rect(line_rect, span_rect)
- span_list.append( span)
- span = None
- span = dict()
- asc = style.asc
- desc = style.desc
- if style.asc < 1e-3:
- asc = 0.9
- desc = -0.1
- span[dictkey_size] = style.size
- span[dictkey_flags] = style.flags
- span[dictkey_bidi] = style.bidi
- if mupdf_version_tuple >= (1, 25, 2):
- span[dictkey_char_flags] = style.char_flags
- span[dictkey_font] = JM_EscapeStrFromStr(style.font)
- span[dictkey_color] = style.argb & 0xffffff
- if mupdf_version_tuple >= (1, 25, 0):
- span['alpha'] = style.argb >> 24
- span["ascender"] = asc
- span["descender"] = desc
- # Need to be careful here - doing 'old_style=style' does a shallow
- # copy, but we need to keep old_style as a distinct instance.
- old_style = char_style(style)
- span_rect = r
- span_origin = origin
- span_rect = mupdf.fz_union_rect(span_rect, r)
- if raw: # make and append a char dict
- char_dict = dict()
- char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
- char_dict[dictkey_bbox] = JM_py_from_rect(r)
- char_dict[dictkey_c] = chr(ch.m_internal.c)
- char_dict['synthetic'] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC)
- if char_list is None:
- char_list = []
- char_list.append(char_dict)
- else: # add character byte to buffer
- JM_append_rune(buff, ch.m_internal.c)
- # all characters processed, now flush remaining span
- if span:
- if raw:
- span[dictkey_chars] = char_list
- char_list = None
- else:
- span[dictkey_text] = JM_EscapeStrFromBuffer(buff)
- mupdf.fz_clear_buffer(buff)
- span[dictkey_origin] = JM_py_from_point(span_origin)
- span[dictkey_bbox] = JM_py_from_rect(span_rect)
- if not mupdf.fz_is_empty_rect(span_rect):
- span_list.append(span)
- line_rect = mupdf.fz_union_rect(line_rect, span_rect)
- span = None
- if not mupdf.fz_is_empty_rect(line_rect):
- line_dict[dictkey_spans] = span_list
- else:
- line_dict[dictkey_spans] = span_list
- return line_rect
- def _make_image_dict(img, img_dict):
- """Populate a dictionary with information extracted from a given image.
- Used by 'Document.extract_image' and by 'JM_make_image_block'.
- Both of these functions will add some more specific information.
- """
- img_type = img.fz_compressed_image_type()
- ext = JM_image_extension(img_type)
- # compressed image buffer if present, else None
- ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
- if (0
- or not ll_cbuf
- or img_type in (mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN)
- or img_type < mupdf.FZ_IMAGE_BMP
- ):
- # not an image with a compressed buffer: convert to PNG
- res = mupdf.fz_new_buffer_from_image_as_png(
- img,
- mupdf.FzColorParams(mupdf.fz_default_color_params),
- )
- ext = "png"
- elif ext == "jpeg" and img.n() == 4:
- # JPEG with CMYK: invert colors
- res = mupdf.fz_new_buffer_from_image_as_jpeg(
- img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1)
- else:
- # copy the compressed buffer
- res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
- bytes_ = JM_BinFromBuffer(res)
- img_dict[dictkey_width] = img.w()
- img_dict[dictkey_height] = img.h()
- img_dict[dictkey_ext] = ext
- img_dict[dictkey_colorspace] = img.n()
- img_dict[dictkey_xres] = img.xres()
- img_dict[dictkey_yres] = img.yres()
- img_dict[dictkey_bpc] = img.bpc()
- img_dict[dictkey_size] = len(bytes_)
- img_dict[dictkey_image] = bytes_
- def JM_make_image_block(block, block_dict):
- img = block.i_image()
- _make_image_dict(img, block_dict)
- # if the image has a mask, store it as a PNG buffer
- mask = img.mask()
- if mask.m_internal:
- buff = mask.fz_new_buffer_from_image_as_png(mupdf.FzColorParams(mupdf.fz_default_color_params))
- block_dict["mask"] = buff.fz_buffer_extract()
- else:
- block_dict["mask"] = None
- block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
- def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
- if 1 or g_use_extra:
- return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
- line_list = []
- block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- #log(f'{block=}')
- for line in block:
- #log(f'{line=}')
- if (mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(line.m_internal.bbox)))
- and not mupdf.fz_is_infinite_rect(tp_rect)
- ):
- continue
- line_dict = dict()
- line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
- block_rect = mupdf.fz_union_rect(block_rect, line_rect)
- line_dict[dictkey_wmode] = line.m_internal.wmode
- line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir)
- line_dict[dictkey_bbox] = JM_py_from_rect(line_rect)
- line_list.append(line_dict)
- block_dict[dictkey_bbox] = JM_py_from_rect(block_rect)
- block_dict[dictkey_lines] = line_list
- def JM_make_textpage_dict(tp, page_dict, raw):
- if 1 or g_use_extra:
- return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
- text_buffer = mupdf.fz_new_buffer(128)
- block_list = []
- tp_rect = mupdf.FzRect(tp.m_internal.mediabox)
- block_n = -1
- #log( 'JM_make_textpage_dict {=tp}')
- for block in tp:
- block_n += 1
- if (not mupdf.fz_contains_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox))
- and not mupdf.fz_is_infinite_rect(tp_rect)
- and block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE
- ):
- continue
- if (not mupdf.fz_is_infinite_rect(tp_rect)
- and mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox)))
- ):
- continue
- block_dict = dict()
- block_dict[dictkey_number] = block_n
- block_dict[dictkey_type] = block.m_internal.type
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE:
- block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
- JM_make_image_block(block, block_dict)
- else:
- JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect)
- block_list.append(block_dict)
- page_dict[dictkey_blocks] = block_list
- def JM_matrix_from_py(m):
- a = [0, 0, 0, 0, 0, 0]
- if isinstance(m, mupdf.FzMatrix):
- return m
- if isinstance(m, Matrix):
- return mupdf.FzMatrix(m.a, m.b, m.c, m.d, m.e, m.f)
- if not m or not PySequence_Check(m) or PySequence_Size(m) != 6:
- return mupdf.FzMatrix()
- for i in range(6):
- a[i] = JM_FLOAT_ITEM(m, i)
- if a[i] is None:
- return mupdf.FzRect()
- return mupdf.FzMatrix(a[0], a[1], a[2], a[3], a[4], a[5])
- def JM_mediabox(page_obj):
- '''
- return a PDF page's MediaBox
- '''
- page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- mediabox = mupdf.pdf_to_rect(
- mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('MediaBox'))
- )
- if mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox):
- mediabox.x0 = 0
- mediabox.y0 = 0
- mediabox.x1 = 612
- mediabox.y1 = 792
- page_mediabox = mupdf.FzRect(
- mupdf.fz_min(mediabox.x0, mediabox.x1),
- mupdf.fz_min(mediabox.y0, mediabox.y1),
- mupdf.fz_max(mediabox.x0, mediabox.x1),
- mupdf.fz_max(mediabox.y0, mediabox.y1),
- )
- if (page_mediabox.x1 - page_mediabox.x0 < 1
- or page_mediabox.y1 - page_mediabox.y0 < 1
- ):
- page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- return page_mediabox
- def JM_merge_range(
- doc_des,
- doc_src,
- spage,
- epage,
- apage,
- rotate,
- links,
- annots,
- show_progress,
- graft_map,
- ):
- '''
- Copy a range of pages (spage, epage) from a source PDF to a specified
- location (apage) of the target PDF.
- If spage > epage, the sequence of source pages is reversed.
- '''
- if g_use_extra:
- return extra.JM_merge_range(
- doc_des,
- doc_src,
- spage,
- epage,
- apage,
- rotate,
- links,
- annots,
- show_progress,
- graft_map,
- )
- afterpage = apage
- counter = 0 # copied pages counter
- total = mupdf.fz_absi(epage - spage) + 1 # total pages to copy
- if spage < epage:
- page = spage
- while page <= epage:
- page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
- counter += 1
- if show_progress > 0 and counter % show_progress == 0:
- message(f"Inserted {counter} of {total} pages.")
- page += 1
- afterpage += 1
- else:
- page = spage
- while page >= epage:
- page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
- counter += 1
- if show_progress > 0 and counter % show_progress == 0:
- message(f"Inserted {counter} of {total} pages.")
- page -= 1
- afterpage += 1
- def JM_merge_resources( page, temp_res):
- '''
- Merge the /Resources object created by a text pdf device into the page.
- The device may have created multiple /ExtGState/Alp? and /Font/F? objects.
- These need to be renamed (renumbered) to not overwrite existing page
- objects from previous executions.
- Returns the next available numbers n, m for objects /Alp<n>, /F<m>.
- '''
- # page objects /Resources, /Resources/ExtGState, /Resources/Font
- resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 5)
- main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
- main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
- # text pdf device objects /ExtGState, /Font
- temp_extg = mupdf.pdf_dict_get(temp_res, PDF_NAME('ExtGState'))
- temp_fonts = mupdf.pdf_dict_get(temp_res, PDF_NAME('Font'))
- max_alp = -1
- max_fonts = -1
- # Handle /Alp objects
- if mupdf.pdf_is_dict(temp_extg): # any created at all?
- n = mupdf.pdf_dict_len(temp_extg)
- if mupdf.pdf_is_dict(main_extg): # does page have /ExtGState yet?
- for i in range(mupdf.pdf_dict_len(main_extg)):
- # get highest number of objects named /Alpxxx
- alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key(main_extg, i))
- if not alp.startswith('Alp'):
- continue
- j = mupdf.fz_atoi(alp[3:])
- if j > max_alp:
- max_alp = j
- else: # create a /ExtGState for the page
- main_extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), n)
- max_alp += 1
- for i in range(n): # copy over renumbered /Alp objects
- alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_extg, i))
- j = mupdf.fz_atoi(alp[3:]) + max_alp
- text = f'Alp{j}'
- val = mupdf.pdf_dict_get_val( temp_extg, i)
- mupdf.pdf_dict_puts(main_extg, text, val)
- if mupdf.pdf_is_dict(main_fonts): # has page any fonts yet?
- for i in range(mupdf.pdf_dict_len(main_fonts)): # get max font number
- font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( main_fonts, i))
- if not font.startswith("F"):
- continue
- j = mupdf.fz_atoi(font[1:])
- if j > max_fonts:
- max_fonts = j
- else: # create a Resources/Font for the page
- main_fonts = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Font'), 2)
- max_fonts += 1
- for i in range(mupdf.pdf_dict_len(temp_fonts)): # copy renumbered fonts
- font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_fonts, i))
- j = mupdf.fz_atoi(font[1:]) + max_fonts
- text = f'F{j}'
- val = mupdf.pdf_dict_get_val(temp_fonts, i)
- mupdf.pdf_dict_puts(main_fonts, text, val)
- return (max_alp, max_fonts) # next available numbers
- def JM_mupdf_warning( text):
- '''
- redirect MuPDF warnings
- '''
- JM_mupdf_warnings_store.append(text)
- if JM_mupdf_show_warnings:
- message(f'MuPDF warning: {text}')
- def JM_mupdf_error( text):
- JM_mupdf_warnings_store.append(text)
- if JM_mupdf_show_errors:
- message(f'MuPDF error: {text}\n')
- def JM_new_bbox_device(rc, inc_layers):
- assert isinstance(rc, list)
- return JM_new_bbox_device_Device( rc, inc_layers)
- def JM_new_buffer_from_stext_page(page):
- '''
- make a buffer from an stext_page's text
- '''
- assert isinstance(page, mupdf.FzStextPage)
- rect = mupdf.FzRect(page.m_internal.mediabox)
- buf = mupdf.fz_new_buffer(256)
- for block in page:
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- for line in block:
- for ch in line:
- if (not JM_rects_overlap(rect, JM_char_bbox(line, ch))
- and not mupdf.fz_is_infinite_rect(rect)
- ):
- continue
- mupdf.fz_append_rune(buf, ch.m_internal.c)
- mupdf.fz_append_byte(buf, ord('\n'))
- mupdf.fz_append_byte(buf, ord('\n'))
- return buf
- def JM_new_javascript(pdf, value):
- '''
- make new PDF action object from JavaScript source
- Parameters are a PDF document and a Python string.
- Returns a PDF action object.
- '''
- if value is None:
- # no argument given
- return
- data = JM_StrAsChar(value)
- if data is None:
- # not convertible to char*
- return
- res = mupdf.fz_new_buffer_from_copied_data(data.encode('utf8'))
- source = mupdf.pdf_add_stream(pdf, res, mupdf.PdfObj(), 0)
- newaction = mupdf.pdf_add_new_dict(pdf, 4)
- mupdf.pdf_dict_put(newaction, PDF_NAME('S'), mupdf.pdf_new_name('JavaScript'))
- mupdf.pdf_dict_put(newaction, PDF_NAME('JS'), source)
- return newaction
- def JM_new_output_fileptr(bio):
- return JM_new_output_fileptr_Output( bio)
- def JM_norm_rotation(rotate):
- '''
- # return normalized /Rotate value:one of 0, 90, 180, 270
- '''
- while rotate < 0:
- rotate += 360
- while rotate >= 360:
- rotate -= 360
- if rotate % 90 != 0:
- return 0
- return rotate
- def JM_object_to_buffer(what, compress, ascii):
- res = mupdf.fz_new_buffer(512)
- out = mupdf.FzOutput(res)
- mupdf.pdf_print_obj(out, what, compress, ascii)
- out.fz_close_output()
- mupdf.fz_terminate_buffer(res)
- return res
- def JM_outline_xrefs(obj, xrefs):
- '''
- Return list of outline xref numbers. Recursive function. Arguments:
- 'obj' first OL item
- 'xrefs' empty Python list
- '''
- if not obj.m_internal:
- return xrefs
- thisobj = obj
- while thisobj.m_internal:
- newxref = mupdf.pdf_to_num( thisobj)
- if newxref in xrefs or mupdf.pdf_dict_get( thisobj, PDF_NAME('Type')).m_internal:
- # circular ref or top of chain: terminate
- break
- xrefs.append( newxref)
- first = mupdf.pdf_dict_get( thisobj, PDF_NAME('First')) # try go down
- if mupdf.pdf_is_dict( first):
- xrefs = JM_outline_xrefs( first, xrefs)
- thisobj = mupdf.pdf_dict_get( thisobj, PDF_NAME('Next')) # try go next
- parent = mupdf.pdf_dict_get( thisobj, PDF_NAME('Parent')) # get parent
- if not mupdf.pdf_is_dict( thisobj):
- thisobj = parent
- return xrefs
- def JM_page_rotation(page):
- '''
- return a PDF page's /Rotate value: one of (0, 90, 180, 270)
- '''
- rotate = 0
- obj = mupdf.pdf_dict_get_inheritable( page.obj(), mupdf.PDF_ENUM_NAME_Rotate)
- rotate = mupdf.pdf_to_int(obj)
- rotate = JM_norm_rotation(rotate)
- return rotate
- def JM_pdf_obj_from_str(doc, src):
- '''
- create PDF object from given string (new in v1.14.0: MuPDF dropped it)
- '''
- # fixme: seems inefficient to convert to bytes instance then make another
- # copy inside fz_new_buffer_from_copied_data(), but no other way?
- #
- buffer_ = mupdf.fz_new_buffer_from_copied_data(bytes(src, 'utf8'))
- stream = mupdf.fz_open_buffer(buffer_)
- lexbuf = mupdf.PdfLexbuf(mupdf.PDF_LEXBUF_SMALL)
- result = mupdf.pdf_parse_stm_obj(doc, stream, lexbuf)
- return result
- def JM_pixmap_from_display_list(
- list_,
- ctm,
- cs,
- alpha,
- clip,
- seps,
- ):
- '''
- Version of fz_new_pixmap_from_display_list (util.c) to also support
- rendering of only the 'clip' part of the displaylist rectangle
- '''
- assert isinstance(list_, mupdf.FzDisplayList)
- if seps is None:
- seps = mupdf.FzSeparations()
- assert seps is None or isinstance(seps, mupdf.FzSeparations), f'{type(seps)=}: {seps}'
- rect = mupdf.fz_bound_display_list(list_)
- matrix = JM_matrix_from_py(ctm)
- rclip = JM_rect_from_py(clip)
- rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
- rect = mupdf.fz_transform_rect(rect, matrix)
- irect = mupdf.fz_round_rect(rect)
- assert isinstance( cs, mupdf.FzColorspace)
- pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha)
- if alpha:
- mupdf.fz_clear_pixmap(pix)
- else:
- mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
- if not mupdf.fz_is_infinite_rect(rclip):
- dev = mupdf.fz_new_draw_device_with_bbox(matrix, pix, irect)
- mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), rclip, mupdf.FzCookie())
- else:
- dev = mupdf.fz_new_draw_device(matrix, pix)
- mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- # Use special raw Pixmap constructor so we don't set alpha to true.
- return Pixmap( 'raw', pix)
- def JM_point_from_py(p):
- '''
- PySequence to fz_point. Default: (FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
- '''
- if isinstance(p, mupdf.FzPoint):
- return p
- if isinstance(p, Point):
- return mupdf.FzPoint(p.x, p.y)
- if g_use_extra:
- return extra.JM_point_from_py( p)
-
- p0 = mupdf.FzPoint(0, 0)
- x = JM_FLOAT_ITEM(p, 0)
- y = JM_FLOAT_ITEM(p, 1)
- if x is None or y is None:
- return p0
- x = max( x, FZ_MIN_INF_RECT)
- y = max( y, FZ_MIN_INF_RECT)
- x = min( x, FZ_MAX_INF_RECT)
- y = min( y, FZ_MAX_INF_RECT)
- return mupdf.FzPoint(x, y)
- def JM_print_stext_page_as_text(res, page):
- '''
- Plain text output. An identical copy of fz_print_stext_page_as_text,
- but lines within a block are concatenated by space instead a new-line
- character (which else leads to 2 new-lines).
- '''
- if 1 and g_use_extra:
- return extra.JM_print_stext_page_as_text(res, page)
-
- assert isinstance(res, mupdf.FzBuffer)
- assert isinstance(page, mupdf.FzStextPage)
- rect = mupdf.FzRect(page.m_internal.mediabox)
- last_char = 0
- n_blocks = 0
- n_lines = 0
- n_chars = 0
- for n_blocks2, block in enumerate( page):
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- for n_lines2, line in enumerate( block):
- for n_chars2, ch in enumerate( line):
- pass
- n_chars += n_chars2
- n_lines += n_lines2
- n_blocks += n_blocks2
-
- for block in page:
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- for line in block:
- last_char = 0
- for ch in line:
- chbbox = JM_char_bbox(line, ch)
- if (mupdf.fz_is_infinite_rect(rect)
- or JM_rects_overlap(rect, chbbox)
- ):
- #raw += chr(ch.m_internal.c)
- last_char = ch.m_internal.c
- #log( '{=last_char!r utf!r}')
- JM_append_rune(res, last_char)
- if last_char != 10 and last_char > 0:
- mupdf.fz_append_string(res, "\n")
- def JM_put_script(annot_obj, key1, key2, value):
- '''
- Create a JavaScript PDF action.
- Usable for all object types which support PDF actions, even if the
- argument name suggests annotations. Up to 2 key values can be specified, so
- JavaScript actions can be stored for '/A' and '/AA/?' keys.
- '''
- key1_obj = mupdf.pdf_dict_get(annot_obj, key1)
- pdf = mupdf.pdf_get_bound_document(annot_obj) # owning PDF
- # if no new script given, just delete corresponding key
- if not value:
- if key2 is None or not key2.m_internal:
- mupdf.pdf_dict_del(annot_obj, key1)
- elif key1_obj.m_internal:
- mupdf.pdf_dict_del(key1_obj, key2)
- return
- # read any existing script as a PyUnicode string
- if not key2.m_internal or not key1_obj.m_internal:
- script = JM_get_script(key1_obj)
- else:
- script = JM_get_script(mupdf.pdf_dict_get(key1_obj, key2))
- # replace old script, if different from new one
- if value != script:
- newaction = JM_new_javascript(pdf, value)
- if not key2.m_internal:
- mupdf.pdf_dict_put(annot_obj, key1, newaction)
- else:
- mupdf.pdf_dict_putl(annot_obj, newaction, key1, key2)
- def JM_py_from_irect(r):
- return r.x0, r.y0, r.x1, r.y1
- def JM_py_from_matrix(m):
- return m.a, m.b, m.c, m.d, m.e, m.f
- def JM_py_from_point(p):
- return p.x, p.y
- def JM_py_from_quad(q):
- '''
- PySequence from fz_quad.
- '''
- return (
- (q.ul.x, q.ul.y),
- (q.ur.x, q.ur.y),
- (q.ll.x, q.ll.y),
- (q.lr.x, q.lr.y),
- )
- def JM_py_from_rect(r):
- return r.x0, r.y0, r.x1, r.y1
- def JM_quad_from_py(r):
- if isinstance(r, mupdf.FzQuad):
- return r
- # cover all cases of 4-float-sequences
- if hasattr(r, "__getitem__") and len(r) == 4 and hasattr(r[0], "__float__"):
- r = mupdf.FzRect(*tuple(r))
- if isinstance( r, mupdf.FzRect):
- return mupdf.fz_quad_from_rect( r)
- if isinstance( r, Quad):
- return mupdf.fz_make_quad(
- r.ul.x, r.ul.y,
- r.ur.x, r.ur.y,
- r.ll.x, r.ll.y,
- r.lr.x, r.lr.y,
- )
- q = mupdf.fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0)
- p = [0,0,0,0]
- if not r or not isinstance(r, (tuple, list)) or len(r) != 4:
- return q
- if JM_FLOAT_ITEM(r, 0) is None:
- return mupdf.fz_quad_from_rect(JM_rect_from_py(r))
- for i in range(4):
- if i >= len(r):
- return q # invalid: cancel the rest
- obj = r[i] # next point item
- if not PySequence_Check(obj) or PySequence_Size(obj) != 2:
- return q # invalid: cancel the rest
- p[i].x = JM_FLOAT_ITEM(obj, 0)
- p[i].y = JM_FLOAT_ITEM(obj, 1)
- if p[i].x is None or p[i].y is None:
- return q
- p[i].x = max( p[i].x, FZ_MIN_INF_RECT)
- p[i].y = max( p[i].y, FZ_MIN_INF_RECT)
- p[i].x = min( p[i].x, FZ_MAX_INF_RECT)
- p[i].y = min( p[i].y, FZ_MAX_INF_RECT)
- q.ul = p[0]
- q.ur = p[1]
- q.ll = p[2]
- q.lr = p[3]
- return q
- def JM_read_contents(pageref):
- '''
- Read and concatenate a PDF page's /Contents object(s) in a buffer
- '''
- assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}'
- contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents)
- if mupdf.pdf_is_array(contents):
- res = mupdf.FzBuffer(1024)
- for i in range(mupdf.pdf_array_len(contents)):
- if i > 0:
- mupdf.fz_append_byte(res, 32)
- obj = mupdf.pdf_array_get(contents, i)
- if mupdf.pdf_is_stream(obj):
- nres = mupdf.pdf_load_stream(obj)
- mupdf.fz_append_buffer(res, nres)
- elif contents.m_internal:
- res = mupdf.pdf_load_stream(contents)
- else:
- res = mupdf.FzBuffer(0)
- return res
- def JM_rect_from_py(r):
- if isinstance(r, mupdf.FzRect):
- return r
- if isinstance(r, mupdf.FzIrect):
- return mupdf.FzRect(r)
- if isinstance(r, Rect):
- return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
- if isinstance(r, IRect):
- return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
- if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
- return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
- f = [0, 0, 0, 0]
- for i in range(4):
- f[i] = JM_FLOAT_ITEM(r, i)
- if f[i] is None:
- return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
- if f[i] < FZ_MIN_INF_RECT:
- f[i] = FZ_MIN_INF_RECT
- if f[i] > FZ_MAX_INF_RECT:
- f[i] = FZ_MAX_INF_RECT
- return mupdf.fz_make_rect(f[0], f[1], f[2], f[3])
- def JM_rects_overlap(a, b):
- if (0
- or a.x0 >= b.x1
- or a.y0 >= b.y1
- or a.x1 <= b.x0
- or a.y1 <= b.y0
- ):
- return 0
- return 1
- def JM_refresh_links( page):
- '''
- refreshes the link and annotation tables of a page
- '''
- if page is None or not page.m_internal:
- return
- obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
- if obj.m_internal:
- pdf = page.doc()
- number = mupdf.pdf_lookup_page_number( pdf, page.obj())
- page_mediabox = mupdf.FzRect()
- page_ctm = mupdf.FzMatrix()
- mupdf.pdf_page_transform( page, page_mediabox, page_ctm)
- link = mupdf.pdf_load_link_annots( pdf, page, obj, number, page_ctm)
- page.m_internal.links = mupdf.ll_fz_keep_link( link.m_internal)
- def JM_rotate_page_matrix(page):
- '''
- calculate page rotation matrices
- '''
- if not page.m_internal:
- return mupdf.FzMatrix() # no valid pdf page given
- rotation = JM_page_rotation(page)
- #log( '{rotation=}')
- if rotation == 0:
- return mupdf.FzMatrix() # no rotation
- cb_size = JM_cropbox_size(page.obj())
- w = cb_size.x
- h = cb_size.y
- #log( '{=h w}')
- if rotation == 90:
- m = mupdf.fz_make_matrix(0, 1, -1, 0, h, 0)
- elif rotation == 180:
- m = mupdf.fz_make_matrix(-1, 0, 0, -1, w, h)
- else:
- m = mupdf.fz_make_matrix(0, -1, 1, 0, 0, w)
- #log( 'returning {m=}')
- return m
- def JM_search_stext_page(page, needle):
- if 1 or g_use_extra:
- return extra.JM_search_stext_page(page.m_internal, needle)
-
- rect = mupdf.FzRect(page.m_internal.mediabox)
- if not needle:
- return
- quads = []
- class Hits:
- def __str__(self):
- return f'Hits(len={self.len} quads={self.quads} hfuzz={self.hfuzz} vfuzz={self.vfuzz}'
- hits = Hits()
- hits.len = 0
- hits.quads = quads
- hits.hfuzz = 0.2 # merge kerns but not large gaps
- hits.vfuzz = 0.1
- buffer_ = JM_new_buffer_from_stext_page(page)
- haystack_string = mupdf.fz_string_from_buffer(buffer_)
- haystack = 0
- begin, end = find_string(haystack_string[haystack:], needle)
- if begin is None:
- #goto no_more_matches;
- return quads
- begin += haystack
- end += haystack
- inside = 0
- i = 0
- for block in page:
- if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
- continue
- for line in block:
- for ch in line:
- i += 1
- if not mupdf.fz_is_infinite_rect(rect):
- r = JM_char_bbox(line, ch)
- if not JM_rects_overlap(rect, r):
- #goto next_char;
- continue
- while 1:
- #try_new_match:
- if not inside:
- if haystack >= begin:
- inside = 1
- if inside:
- if haystack < end:
- on_highlight_char(hits, line, ch)
- break
- else:
- inside = 0
- begin, end = find_string(haystack_string[haystack:], needle)
- if begin is None:
- #goto no_more_matches;
- return quads
- else:
- #goto try_new_match;
- begin += haystack
- end += haystack
- continue
- break
- haystack += 1
- #next_char:;
- assert haystack_string[haystack] == '\n', \
- f'{haystack=} {haystack_string[haystack]=}'
- haystack += 1
- assert haystack_string[haystack] == '\n', \
- f'{haystack=} {haystack_string[haystack]=}'
- haystack += 1
- #no_more_matches:;
- return quads
- def JM_scan_resources(pdf, rsrc, liste, what, stream_xref, tracer):
- '''
- Step through /Resources, looking up image, xobject or font information
- '''
- if mupdf.pdf_mark_obj(rsrc):
- mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
- return # Circular dependencies!
- try:
- xobj = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_XObject)
- if what == 1: # lookup fonts
- font = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_Font)
- JM_gather_fonts(pdf, font, liste, stream_xref)
- elif what == 2: # look up images
- JM_gather_images(pdf, xobj, liste, stream_xref)
- elif what == 3: # look up form xobjects
- JM_gather_forms(pdf, xobj, liste, stream_xref)
- else: # should never happen
- return
- # check if we need to recurse into Form XObjects
- n = mupdf.pdf_dict_len(xobj)
- for i in range(n):
- obj = mupdf.pdf_dict_get_val(xobj, i)
- if mupdf.pdf_is_stream(obj):
- sxref = mupdf.pdf_to_num(obj)
- else:
- sxref = 0
- subrsrc = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Resources)
- if subrsrc.m_internal:
- sxref_t = sxref
- if sxref_t not in tracer:
- tracer.append(sxref_t)
- JM_scan_resources( pdf, subrsrc, liste, what, sxref, tracer)
- else:
- mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
- return
- finally:
- mupdf.pdf_unmark_obj(rsrc)
- def JM_set_choice_options(annot, liste):
- '''
- set ListBox / ComboBox values
- '''
- if not liste:
- return
- assert isinstance( liste, (tuple, list))
- n = len( liste)
- if n == 0:
- return
- annot_obj = mupdf.pdf_annot_obj( annot)
- pdf = mupdf.pdf_get_bound_document( annot_obj)
- optarr = mupdf.pdf_new_array( pdf, n)
- for i in range(n):
- val = liste[i]
- opt = val
- if isinstance(opt, str):
- mupdf.pdf_array_push_text_string( optarr, opt)
- else:
- assert isinstance( val, (tuple, list)) and len( val) == 2, 'bad choice field list'
- opt1, opt2 = val
- assert opt1 and opt2, 'bad choice field list'
- optarrsub = mupdf.pdf_array_push_array( optarr, 2)
- mupdf.pdf_array_push_text_string( optarrsub, opt1)
- mupdf.pdf_array_push_text_string( optarrsub, opt2)
- mupdf.pdf_dict_put( annot_obj, PDF_NAME('Opt'), optarr)
- def JM_set_field_type(doc, obj, type):
- '''
- Set the field type
- '''
- setbits = 0
- clearbits = 0
- typename = None
- if type == mupdf.PDF_WIDGET_TYPE_BUTTON:
- typename = PDF_NAME('Btn')
- setbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
- elif type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- typename = PDF_NAME('Btn')
- clearbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
- setbits = mupdf.PDF_BTN_FIELD_IS_RADIO
- elif type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
- typename = PDF_NAME('Btn')
- clearbits = (mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | mupdf.PDF_BTN_FIELD_IS_RADIO)
- elif type == mupdf.PDF_WIDGET_TYPE_TEXT:
- typename = PDF_NAME('Tx')
- elif type == mupdf.PDF_WIDGET_TYPE_LISTBOX:
- typename = PDF_NAME('Ch')
- clearbits = mupdf.PDF_CH_FIELD_IS_COMBO
- elif type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
- typename = PDF_NAME('Ch')
- setbits = mupdf.PDF_CH_FIELD_IS_COMBO
- elif type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- typename = PDF_NAME('Sig')
- if typename is not None and typename.m_internal:
- mupdf.pdf_dict_put(obj, PDF_NAME('FT'), typename)
- if setbits != 0 or clearbits != 0:
- bits = mupdf.pdf_dict_get_int(obj, PDF_NAME('Ff'))
- bits &= ~clearbits
- bits |= setbits
- mupdf.pdf_dict_put_int(obj, PDF_NAME('Ff'), bits)
- def JM_set_object_value(obj, key, value):
- '''
- Set a PDF dict key to some value
- '''
- eyecatcher = "fitz: replace me!"
- pdf = mupdf.pdf_get_bound_document(obj)
- # split PDF key at path seps and take last key part
- list_ = key.split('/')
- len_ = len(list_)
- i = len_ - 1
- skey = list_[i]
- del list_[i] # del the last sub-key
- len_ = len(list_) # remaining length
- testkey = mupdf.pdf_dict_getp(obj, key) # check if key already exists
- if not testkey.m_internal:
- #No, it will be created here. But we cannot allow this happening if
- #indirect objects are referenced. So we check all higher level
- #sub-paths for indirect references.
- while len_ > 0:
- t = '/'.join(list_) # next high level
- if mupdf.pdf_is_indirect(mupdf.pdf_dict_getp(obj, JM_StrAsChar(t))):
- raise Exception("path to '%s' has indirects", JM_StrAsChar(skey))
- del list_[len_ - 1] # del last sub-key
- len_ = len(list_) # remaining length
- # Insert our eyecatcher. Will create all sub-paths in the chain, or
- # respectively remove old value of key-path.
- mupdf.pdf_dict_putp(obj, key, mupdf.pdf_new_text_string(eyecatcher))
- testkey = mupdf.pdf_dict_getp(obj, key)
- if not mupdf.pdf_is_string(testkey):
- raise Exception("cannot insert value for '%s'", key)
- temp = mupdf.pdf_to_text_string(testkey)
- if temp != eyecatcher:
- raise Exception("cannot insert value for '%s'", key)
- # read the result as a string
- res = JM_object_to_buffer(obj, 1, 0)
- objstr = JM_EscapeStrFromBuffer(res)
- # replace 'eyecatcher' by desired 'value'
- nullval = "/%s(%s)" % ( skey, eyecatcher)
- newval = "/%s %s" % (skey, value)
- newstr = objstr.replace(nullval, newval, 1)
- # make PDF object from resulting string
- new_obj = JM_pdf_obj_from_str(pdf, newstr)
- return new_obj
- def JM_set_ocg_arrays(conf, basestate, on, off, rbgroups, locked):
- if basestate:
- mupdf.pdf_dict_put_name( conf, PDF_NAME('BaseState'), basestate)
- if on is not None:
- mupdf.pdf_dict_del( conf, PDF_NAME('ON'))
- if on:
- arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('ON'), 1)
- JM_set_ocg_arrays_imp( arr, on)
- if off is not None:
- mupdf.pdf_dict_del( conf, PDF_NAME('OFF'))
- if off:
- arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('OFF'), 1)
- JM_set_ocg_arrays_imp( arr, off)
- if locked is not None:
- mupdf.pdf_dict_del( conf, PDF_NAME('Locked'))
- if locked:
- arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('Locked'), 1)
- JM_set_ocg_arrays_imp( arr, locked)
- if rbgroups is not None:
- mupdf.pdf_dict_del( conf, PDF_NAME('RBGroups'))
- if rbgroups:
- arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('RBGroups'), 1)
- n =len(rbgroups)
- for i in range(n):
- item0 = rbgroups[i]
- obj = mupdf.pdf_array_push_array( arr, 1)
- JM_set_ocg_arrays_imp( obj, item0)
- def JM_set_ocg_arrays_imp(arr, list_):
- '''
- Set OCG arrays from dict of Python lists
- Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list}
- '''
- pdf = mupdf.pdf_get_bound_document(arr)
- for xref in list_:
- obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- mupdf.pdf_array_push(arr, obj)
- def JM_set_resource_property(ref, name, xref):
- '''
- Insert an item into Resources/Properties (used for Marked Content)
- Arguments:
- (1) e.g. page object, Form XObject
- (2) marked content name
- (3) xref of the referenced object (insert as indirect reference)
- '''
- pdf = mupdf.pdf_get_bound_document(ref)
- ind = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not ind.m_internal:
- RAISEPY(MSG_BAD_XREF, PyExc_ValueError)
- resources = mupdf.pdf_dict_get(ref, PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(ref, PDF_NAME('Resources'), 1)
- properties = mupdf.pdf_dict_get(resources, PDF_NAME('Properties'))
- if not properties.m_internal:
- properties = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Properties'), 1)
- mupdf.pdf_dict_put(properties, mupdf.pdf_new_name(name), ind)
- def JM_set_widget_properties(annot, Widget):
- '''
- Update the PDF form field with the properties from a Python Widget object.
- Called by "Page.add_widget" and "Annot.update_widget".
- '''
- if isinstance( annot, Annot):
- annot = annot.this
- assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}'
- page = _pdf_annot_page(annot)
- assert page.m_internal, 'Annot is not bound to a page'
- annot_obj = mupdf.pdf_annot_obj(annot)
- pdf = page.doc()
- def GETATTR(name):
- return getattr(Widget, name, None)
- value = GETATTR("field_type")
- field_type = value
- # rectangle --------------------------------------------------------------
- value = GETATTR("rect")
- rect = JM_rect_from_py(value)
- rot_mat = JM_rotate_page_matrix(page)
- rect = mupdf.fz_transform_rect(rect, rot_mat)
- mupdf.pdf_set_annot_rect(annot, rect)
- # fill color -------------------------------------------------------------
- value = GETATTR("fill_color")
- if value and PySequence_Check(value):
- n = len(value)
- fill_col = mupdf.pdf_new_array(pdf, n)
- col = 0
- for i in range(n):
- col = value[i]
- mupdf.pdf_array_push_real(fill_col, col)
- mupdf.pdf_field_set_fill_color(annot_obj, fill_col)
- # dashes -----------------------------------------------------------------
- value = GETATTR("border_dashes")
- if value and PySequence_Check(value):
- n = len(value)
- dashes = mupdf.pdf_new_array(pdf, n)
- for i in range(n):
- mupdf.pdf_array_push_int(dashes, value[i])
- mupdf.pdf_dict_putl(annot_obj, dashes, PDF_NAME('BS'), PDF_NAME('D'))
- # border color -----------------------------------------------------------
- value = GETATTR("border_color")
- if value and PySequence_Check(value):
- n = len(value)
- border_col = mupdf.pdf_new_array(pdf, n)
- col = 0
- for i in range(n):
- col = value[i]
- mupdf.pdf_array_push_real(border_col, col)
- mupdf.pdf_dict_putl(annot_obj, border_col, PDF_NAME('MK'), PDF_NAME('BC'))
- # entry ignored - may be used later
- #
- #int text_format = (int) PyInt_AsLong(GETATTR("text_format"));
- #
- # field label -----------------------------------------------------------
- value = GETATTR("field_label")
- if value is not None:
- label = JM_StrAsChar(value)
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('TU'), label)
- # field name -------------------------------------------------------------
- value = GETATTR("field_name")
- if value is not None:
- name = JM_StrAsChar(value)
- old_name = mupdf.pdf_load_field_name(annot_obj)
- if name != old_name:
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), name)
- # max text len -----------------------------------------------------------
- if field_type == mupdf.PDF_WIDGET_TYPE_TEXT:
- value = GETATTR("text_maxlen")
- text_maxlen = value
- if text_maxlen:
- mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('MaxLen'), text_maxlen)
- value = GETATTR("field_display")
- d = value
- mupdf.pdf_field_set_display(annot_obj, d)
- # choice values ----------------------------------------------------------
- if field_type in (mupdf.PDF_WIDGET_TYPE_LISTBOX, mupdf.PDF_WIDGET_TYPE_COMBOBOX):
- value = GETATTR("choice_values")
- JM_set_choice_options(annot, value)
- # border style -----------------------------------------------------------
- value = GETATTR("border_style")
- val = JM_get_border_style(value)
- mupdf.pdf_dict_putl(annot_obj, val, PDF_NAME('BS'), PDF_NAME('S'))
- # border width -----------------------------------------------------------
- value = GETATTR("border_width")
- border_width = value
- mupdf.pdf_dict_putl(
- annot_obj,
- mupdf.pdf_new_real(border_width),
- PDF_NAME('BS'),
- PDF_NAME('W'),
- )
- # /DA string -------------------------------------------------------------
- value = GETATTR("_text_da")
- da = JM_StrAsChar(value)
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('DA'), da)
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('DS')) # not supported by MuPDF
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('RC')) # not supported by MuPDF
- # field flags ------------------------------------------------------------
- field_flags = GETATTR("field_flags")
- if field_flags is not None:
- if field_type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
- field_flags |= mupdf.PDF_CH_FIELD_IS_COMBO
- elif field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- field_flags |= mupdf.PDF_BTN_FIELD_IS_RADIO
- elif field_type == mupdf.PDF_WIDGET_TYPE_BUTTON:
- field_flags |= mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
- mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Ff'), field_flags)
- # button caption ---------------------------------------------------------
- value = GETATTR("button_caption")
- ca = JM_StrAsChar(value)
- if ca:
- mupdf.pdf_field_set_button_caption(annot_obj, ca)
- # script (/A) -------------------------------------------------------
- value = GETATTR("script")
- JM_put_script(annot_obj, PDF_NAME('A'), mupdf.PdfObj(), value)
- # script (/AA/K) -------------------------------------------------------
- value = GETATTR("script_stroke")
- JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('K'), value)
- # script (/AA/F) -------------------------------------------------------
- value = GETATTR("script_format")
- JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('F'), value)
- # script (/AA/V) -------------------------------------------------------
- value = GETATTR("script_change")
- JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('V'), value)
- # script (/AA/C) -------------------------------------------------------
- value = GETATTR("script_calc")
- JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('C'), value)
- # script (/AA/Bl) -------------------------------------------------------
- value = GETATTR("script_blur")
- JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value)
- # script (/AA/Fo) codespell:ignore --------------------------------------
- value = GETATTR("script_focus")
- JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value)
- # field value ------------------------------------------------------------
- value = GETATTR("field_value") # field value
- text = JM_StrAsChar(value) # convert to text (may fail!)
- if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- if not value:
- mupdf.pdf_set_field_value(pdf, annot_obj, "Off", 1)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), "Off")
- else:
- # TODO check if another button in the group is ON and if so set it Off
- onstate = mupdf.pdf_button_field_on_state(annot_obj)
- if onstate.m_internal:
- on = mupdf.pdf_to_name(onstate)
- mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
- elif text:
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), text)
- elif field_type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
- onstate = mupdf.pdf_button_field_on_state(annot_obj)
- on = onstate.pdf_to_name()
- if value in (True, on) or text == 'Yes':
- mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('V'), on)
- else:
- mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('AS'), 'Off')
- mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('V'), 'Off')
- else:
- if text:
- mupdf.pdf_set_field_value(pdf, annot_obj, text, 1)
- if field_type in (mupdf.PDF_WIDGET_TYPE_COMBOBOX, mupdf.PDF_WIDGET_TYPE_LISTBOX):
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('I'))
- mupdf.pdf_dirty_annot(annot)
- mupdf.pdf_set_annot_hot(annot, 1)
- mupdf.pdf_set_annot_active(annot, 1)
- mupdf.pdf_update_annot(annot)
- def JM_show_string_cs(
- text,
- user_font,
- trm,
- s,
- wmode,
- bidi_level,
- markup_dir,
- language,
- ):
- i = 0
- while i < len(s):
- l, ucs = mupdf.fz_chartorune(s[i:])
- i += l
- gid = mupdf.fz_encode_character_sc(user_font, ucs)
- if gid == 0:
- gid, font = mupdf.fz_encode_character_with_fallback(user_font, ucs, 0, language)
- else:
- font = user_font
- mupdf.fz_show_glyph(text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language)
- adv = mupdf.fz_advance_glyph(font, gid, wmode)
- if wmode == 0:
- trm = mupdf.fz_pre_translate(trm, adv, 0)
- else:
- trm = mupdf.fz_pre_translate(trm, 0, -adv)
- return trm
- def JM_UnicodeFromBuffer(buff):
- buff_bytes = mupdf.fz_buffer_extract_copy(buff)
- val = buff_bytes.decode(errors='replace')
- z = val.find(chr(0))
- if z >= 0:
- val = val[:z]
- return val
- def message_warning(text):
- '''
- Generate a warning.
- '''
- message(f'warning: {text}')
- def JM_update_stream(doc, obj, buffer_, compress):
- '''
- update a stream object
- compress stream when beneficial
- '''
- if compress:
- length, _ = mupdf.fz_buffer_storage(buffer_)
- if length > 30: # ignore small stuff
- buffer_compressed = JM_compress_buffer(buffer_)
- assert isinstance(buffer_compressed, mupdf.FzBuffer)
- if buffer_compressed.m_internal:
- length_compressed, _ = mupdf.fz_buffer_storage(buffer_compressed)
- if length_compressed < length: # was it worth the effort?
- mupdf.pdf_dict_put(
- obj,
- mupdf.PDF_ENUM_NAME_Filter,
- mupdf.PDF_ENUM_NAME_FlateDecode,
- )
- mupdf.pdf_update_stream(doc, obj, buffer_compressed, 1)
- return
-
- mupdf.pdf_update_stream(doc, obj, buffer_, 0)
- def JM_xobject_from_page(pdfout, fsrcpage, xref, gmap):
- '''
- Make an XObject from a PDF page
- For a positive xref assume that its object can be used instead
- '''
- assert isinstance(gmap, mupdf.PdfGraftMap), f'{type(gmap)=}'
- if xref > 0:
- xobj1 = mupdf.pdf_new_indirect(pdfout, xref, 0)
- else:
- srcpage = _as_pdf_page(fsrcpage.this)
- spageref = srcpage.obj()
- mediabox = mupdf.pdf_to_rect(mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('MediaBox')))
- # Deep-copy resources object of source page
- o = mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('Resources'))
- if gmap.m_internal:
- # use graftmap when possible
- resources = mupdf.pdf_graft_mapped_object(gmap, o)
- else:
- resources = mupdf.pdf_graft_object(pdfout, o)
- # get spgage contents source
- res = JM_read_contents(spageref)
- #-------------------------------------------------------------
- # create XObject representing the source page
- #-------------------------------------------------------------
- xobj1 = mupdf.pdf_new_xobject(pdfout, mediabox, mupdf.FzMatrix(), mupdf.PdfObj(0), res)
- # store spage contents
- JM_update_stream(pdfout, xobj1, res, 1)
- # store spage resources
- mupdf.pdf_dict_put(xobj1, PDF_NAME('Resources'), resources)
- return xobj1
- def PySequence_Check(s):
- return isinstance(s, (tuple, list))
- def PySequence_Size(s):
- return len(s)
- # constants: error messages. These are also in extra.i.
- #
- MSG_BAD_ANNOT_TYPE = "bad annot type"
- MSG_BAD_APN = "bad or missing annot AP/N"
- MSG_BAD_ARG_INK_ANNOT = "arg must be seq of seq of float pairs"
- MSG_BAD_ARG_POINTS = "bad seq of points"
- MSG_BAD_BUFFER = "bad type: 'buffer'"
- MSG_BAD_COLOR_SEQ = "bad color sequence"
- MSG_BAD_DOCUMENT = "cannot open broken document"
- MSG_BAD_FILETYPE = "bad filetype"
- MSG_BAD_LOCATION = "bad location"
- MSG_BAD_OC_CONFIG = "bad config number"
- MSG_BAD_OC_LAYER = "bad layer number"
- MSG_BAD_OC_REF = "bad 'oc' reference"
- MSG_BAD_PAGEID = "bad page id"
- MSG_BAD_PAGENO = "bad page number(s)"
- MSG_BAD_PDFROOT = "PDF has no root"
- MSG_BAD_RECT = "rect is infinite or empty"
- MSG_BAD_TEXT = "bad type: 'text'"
- MSG_BAD_XREF = "bad xref"
- MSG_COLOR_COUNT_FAILED = "color count failed"
- MSG_FILE_OR_BUFFER = "need font file or buffer"
- MSG_FONT_FAILED = "cannot create font"
- MSG_IS_NO_ANNOT = "is no annotation"
- MSG_IS_NO_IMAGE = "is no image"
- MSG_IS_NO_PDF = "is no PDF"
- MSG_IS_NO_DICT = "object is no PDF dict"
- MSG_PIX_NOALPHA = "source pixmap has no alpha"
- MSG_PIXEL_OUTSIDE = "pixel(s) outside image"
- JM_Exc_FileDataError = 'FileDataError'
- PyExc_ValueError = 'ValueError'
- def RAISEPY( msg, exc):
- #JM_Exc_CurrentException=exc
- #fz_throw(context, FZ_ERROR_GENERIC, msg)
- raise Exception( msg)
- def PyUnicode_DecodeRawUnicodeEscape(s, errors='strict'):
- # FIXED: handle raw unicode escape sequences
- if not s:
- return ""
- if isinstance(s, str):
- rc = s.encode("utf8", errors=errors)
- elif isinstance(s, bytes):
- rc = s[:]
- ret = rc.decode('raw_unicode_escape', errors=errors)
- return ret
- def CheckColor(c: OptSeq):
- if c:
- if (
- type(c) not in (list, tuple)
- or len(c) not in (1, 3, 4)
- or min(c) < 0
- or max(c) > 1
- ):
- raise ValueError("need 1, 3 or 4 color components in range 0 to 1")
- def CheckFont(page: Page, fontname: str) -> tuple:
- """Return an entry in the page's font list if reference name matches.
- """
- for f in page.get_fonts():
- if f[4] == fontname:
- return f
- def CheckFontInfo(doc: Document, xref: int) -> list:
- """Return a font info if present in the document.
- """
- for f in doc.FontInfos:
- if xref == f[0]:
- return f
- def CheckMarkerArg(quads: typing.Any) -> tuple:
- if CheckRect(quads):
- r = Rect(quads)
- return (r.quad,)
- if CheckQuad(quads):
- return (quads,)
- for q in quads:
- if not (CheckRect(q) or CheckQuad(q)):
- raise ValueError("bad quads entry")
- return quads
- def CheckMorph(o: typing.Any) -> bool:
- if not bool(o):
- return False
- if not (type(o) in (list, tuple) and len(o) == 2):
- raise ValueError("morph must be a sequence of length 2")
- if not (len(o[0]) == 2 and len(o[1]) == 6):
- raise ValueError("invalid morph param 0")
- if not o[1][4] == o[1][5] == 0:
- raise ValueError("invalid morph param 1")
- return True
- def CheckParent(o: typing.Any):
- return
- if not hasattr(o, "parent") or o.parent is None:
- raise ValueError(f"orphaned object {type(o)=}: parent is None")
- def CheckQuad(q: typing.Any) -> bool:
- """Check whether an object is convex, not empty quad-like.
- It must be a sequence of 4 number pairs.
- """
- try:
- q0 = Quad(q)
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return False
- return q0.is_convex
- def CheckRect(r: typing.Any) -> bool:
- """Check whether an object is non-degenerate rect-like.
- It must be a sequence of 4 numbers.
- """
- try:
- r = Rect(r)
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return False
- return not (r.is_empty or r.is_infinite)
- def ColorCode(c: typing.Union[list, tuple, float, None], f: str) -> str:
- if not c:
- return ""
- if hasattr(c, "__float__"):
- c = (c,)
- CheckColor(c)
- if len(c) == 1:
- s = _format_g(c[0]) + " "
- return s + "G " if f == "c" else s + "g "
- if len(c) == 3:
- s = _format_g(tuple(c)) + " "
- return s + "RG " if f == "c" else s + "rg "
- s = _format_g(tuple(c)) + " "
- return s + "K " if f == "c" else s + "k "
- def Page__add_text_marker(self, quads, annot_type):
- pdfpage = self._pdf_page()
- rotation = JM_page_rotation(pdfpage)
- def final():
- if rotation != 0:
- mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), rotation)
- try:
- if rotation != 0:
- mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), 0)
- annot = mupdf.pdf_create_annot(pdfpage, annot_type)
- for item in quads:
- q = JM_quad_from_py(item)
- mupdf.pdf_add_annot_quad_point(annot, q)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- final()
- except Exception:
- if g_exceptions_verbose: exception_info()
- final()
- return
- return Annot(annot)
- def PDF_NAME(x):
- assert isinstance(x, str)
- ret = getattr(mupdf, f'PDF_ENUM_NAME_{x}')
- # Note that we return a (swig proxy for) pdf_obj*, not a mupdf.PdfObj. In
- # the C++ API, the constructor PdfObj::PdfObj(pdf_obj*) is marked as
- # explicit, but this seems to be ignored by SWIG. If SWIG started to
- # generate code that respected `explicit`, we would need to do `return
- # mupdf.PdfObj(ret)`.
- #
- # [Compare with extra.i, where we define our own PDF_NAME2() macro that
- # returns a mupdf::PdfObj.]
- return ret
- def UpdateFontInfo(doc: Document, info: typing.Sequence):
- xref = info[0]
- found = False
- for i, fi in enumerate(doc.FontInfos):
- if fi[0] == xref:
- found = True
- break
- if found:
- doc.FontInfos[i] = info
- else:
- doc.FontInfos.append(info)
- def args_match(args, *types):
- '''
- Returns true if <args> matches <types>.
- Each item in <types> is a type or tuple of types. Any of these types will
- match an item in <args>. `None` will match anything in <args>. `type(None)`
- will match an arg whose value is `None`.
- '''
- j = 0
- for i in range(len(types)):
- type_ = types[i]
- if j >= len(args):
- if isinstance(type_, tuple) and None in type_:
- # arg is missing but has default value.
- continue
- else:
- return False
- if type_ is not None and not isinstance(args[j], type_):
- return False
- j += 1
- if j != len(args):
- return False
- return True
- def calc_image_matrix(width, height, tr, rotate, keep):
- '''
- # compute image insertion matrix
- '''
- trect = JM_rect_from_py(tr)
- rot = mupdf.fz_rotate(rotate)
- trw = trect.x1 - trect.x0
- trh = trect.y1 - trect.y0
- w = trw
- h = trh
- if keep:
- large = max(width, height)
- fw = width / large
- fh = height / large
- else:
- fw = fh = 1
- small = min(fw, fh)
- if rotate != 0 and rotate != 180:
- f = fw
- fw = fh
- fh = f
- if fw < 1:
- if trw / fw > trh / fh:
- w = trh * small
- h = trh
- else:
- w = trw
- h = trw / small
- elif fw != fh:
- if trw / fw > trh / fh:
- w = trh / small
- h = trh
- else:
- w = trw
- h = trw * small
- else:
- w = trw
- h = trh
- tmp = mupdf.fz_make_point(
- (trect.x0 + trect.x1) / 2,
- (trect.y0 + trect.y1) / 2,
- )
- mat = mupdf.fz_make_matrix(1, 0, 0, 1, -0.5, -0.5)
- mat = mupdf.fz_concat(mat, rot)
- mat = mupdf.fz_concat(mat, mupdf.fz_scale(w, h))
- mat = mupdf.fz_concat(mat, mupdf.fz_translate(tmp.x, tmp.y))
- return mat
- def detect_super_script(line, ch):
- if line.m_internal.wmode == 0 and line.m_internal.dir.x == 1 and line.m_internal.dir.y == 0:
- return ch.m_internal.origin.y < line.m_internal.first_char.origin.y - ch.m_internal.size * 0.1
- return 0
- def dir_str(x):
- ret = f'{x} {type(x)} ({len(dir(x))}):\n'
- for i in dir(x):
- ret += f' {i}\n'
- return ret
- def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, ordering: int) -> str:
- """ Return a PDF string enclosed in [] brackets, suitable for the PDF TJ
- operator.
- Notes:
- The input string is converted to either 2 or 4 hex digits per character.
- Args:
- simple: no glyphs: 2-chars, use char codes as the glyph
- glyphs: 2-chars, use glyphs instead of char codes (Symbol,
- ZapfDingbats)
- not simple: ordering < 0: 4-chars, use glyphs not char codes
- ordering >=0: a CJK font! 4 chars, use char codes as glyphs
- """
- if text.startswith("[<") and text.endswith(">]"): # already done
- return text
- if not bool(text):
- return "[<>]"
- if simple: # each char or its glyph is coded as a 2-byte hex
- if glyphs is None: # not Symbol, not ZapfDingbats: use char code
- otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text])
- else: # Symbol or ZapfDingbats: use glyphs
- otxt = "".join(
- ["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text]
- )
- return "[<" + otxt + ">]"
- # non-simple fonts: each char or its glyph is coded as 4-byte hex
- if ordering < 0: # not a CJK font: use the glyphs
- otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text])
- else: # CJK: use the char codes
- otxt = "".join(["%04x" % ord(c) for c in text])
- return "[<" + otxt + ">]"
- def get_pdf_str(s: str) -> str:
- """ Return a PDF string depending on its coding.
- Notes:
- Returns a string bracketed with either "()" or "<>" for hex values.
- If only ascii then "(original)" is returned, else if only 8 bit chars
- then "(original)" with interspersed octal strings \nnn is returned,
- else a string "<FEFF[hexstring]>" is returned, where [hexstring] is the
- UTF-16BE encoding of the original.
- """
- if not bool(s):
- return "()"
- def make_utf16be(s):
- r = bytearray([254, 255]) + bytearray(s, "UTF-16BE")
- return "<" + r.hex() + ">" # brackets indicate hex
- # The following either returns the original string with mixed-in
- # octal numbers \nnn for chars outside the ASCII range, or returns
- # the UTF-16BE BOM version of the string.
- r = ""
- for c in s:
- oc = ord(c)
- if oc > 255: # shortcut if beyond 8-bit code range
- return make_utf16be(s)
- if oc > 31 and oc < 127: # in ASCII range
- if c in ("(", ")", "\\"): # these need to be escaped
- r += "\\"
- r += c
- continue
- if oc > 127: # beyond ASCII
- r += "\\%03o" % oc
- continue
- # now the white spaces
- if oc == 8: # backspace
- r += "\\b"
- elif oc == 9: # tab
- r += "\\t"
- elif oc == 10: # line feed
- r += "\\n"
- elif oc == 12: # form feed
- r += "\\f"
- elif oc == 13: # carriage return
- r += "\\r"
- else:
- r += "\\267" # unsupported: replace by 0xB7
- return "(" + r + ")"
- def get_tessdata(tessdata=None):
- """Detect Tesseract language support folder.
- This function is used to enable OCR via Tesseract even if the language
- support folder is not specified directly or in environment variable
- TESSDATA_PREFIX.
- * If <tessdata> is set we return it directly.
-
- * Otherwise we return `os.environ['TESSDATA_PREFIX']` if set.
-
- * Otherwise we search for a Tesseract installation and return its language
- support folder.
- * Otherwise we raise an exception.
- """
- if tessdata:
- return tessdata
- tessdata = os.getenv("TESSDATA_PREFIX")
- if tessdata: # use environment variable if set
- return tessdata
- # Try to locate the tesseract-ocr installation.
-
- import subprocess
-
- cp = subprocess.run('tesseract --list-langs', shell=1, capture_output=1, check=0, text=True)
- if cp.returncode == 0:
- m = re.search('List of available languages in "(.+)"', cp.stdout)
- if m:
- tessdata = m.group(1)
- return tessdata
-
- # Windows systems:
- if sys.platform == "win32":
- cp = subprocess.run("where tesseract", shell=1, capture_output=1, check=0, text=True)
- response = cp.stdout.strip()
- if cp.returncode or not response:
- raise RuntimeError("No tessdata specified and Tesseract is not installed")
- dirname = os.path.dirname(response) # path of tesseract.exe
- tessdata = os.path.join(dirname, "tessdata") # language support
- if os.path.exists(tessdata): # all ok?
- return tessdata
- else: # should not happen!
- raise RuntimeError("No tessdata specified and Tesseract installation has no {tessdata} folder")
- # Unix-like systems:
- attempts = list()
- for path in 'tesseract-ocr', 'tesseract':
- cp = subprocess.run(f'whereis {path}', shell=1, capture_output=1, check=0, text=True)
- if cp.returncode == 0:
- response = cp.stdout.strip().split()
- if len(response) == 2:
- # search tessdata in folder structure
- dirname = response[1] # contains tesseract-ocr installation folder
- pattern = f"{dirname}/*/tessdata"
- attempts.append(pattern)
- tessdatas = glob.glob(pattern)
- tessdatas.sort()
- if tessdatas:
- return tessdatas[-1]
- if attempts:
- text = 'No tessdata specified and no match for:\n'
- for attempt in attempts:
- text += f' {attempt}'
- raise RuntimeError(text)
- else:
- raise RuntimeError('No tessdata specified and Tesseract is not installed')
- def css_for_pymupdf_font(
- fontcode: str, *, CSS: OptStr = None, archive: AnyType = None, name: OptStr = None
- ) -> str:
- """Create @font-face items for the given fontcode of pymupdf-fonts.
- Adds @font-face support for fonts contained in package pymupdf-fonts.
- Creates a CSS font-family for all fonts starting with string 'fontcode'.
- Note:
- The font naming convention in package pymupdf-fonts is "fontcode<sf>",
- where the suffix "sf" is either empty or one of "it", "bo" or "bi".
- These suffixes thus represent the regular, italic, bold or bold-italic
- variants of a font. For example, font code "notos" refers to fonts
- "notos" - "Noto Sans Regular"
- "notosit" - "Noto Sans Italic"
- "notosbo" - "Noto Sans Bold"
- "notosbi" - "Noto Sans Bold Italic"
- This function creates four CSS @font-face definitions and collectively
- assigns the font-family name "notos" to them (or the "name" value).
- All fitting font buffers of the pymupdf-fonts package are placed / added
- to the archive provided as parameter.
- To use the font in pymupdf.Story, execute 'set_font(fontcode)'. The correct
- font weight (bold) or style (italic) will automatically be selected.
- Expects and returns the CSS source, with the new CSS definitions appended.
- Args:
- fontcode: (str) font code for naming the font variants to include.
- E.g. "fig" adds notos, notosi, notosb, notosbi fonts.
- A maximum of 4 font variants is accepted.
- CSS: (str) CSS string to add @font-face definitions to.
- archive: (Archive, mandatory) where to place the font buffers.
- name: (str) use this as family-name instead of 'fontcode'.
- Returns:
- Modified CSS, with appended @font-face statements for each font variant
- of fontcode.
- Fontbuffers associated with "fontcode" will be added to 'archive'.
- """
- # @font-face template string
- CSSFONT = "\n@font-face {font-family: %s; src: url(%s);%s%s}\n"
- if not type(archive) is Archive:
- raise ValueError("'archive' must be an Archive")
- if CSS is None:
- CSS = ""
- # select font codes starting with the pass-in string
- font_keys = [k for k in fitz_fontdescriptors.keys() if k.startswith(fontcode)]
- if font_keys == []:
- raise ValueError(f"No font code '{fontcode}' found in pymupdf-fonts.")
- if len(font_keys) > 4:
- raise ValueError("fontcode too short")
- if name is None: # use this name for font-family
- name = fontcode
- for fkey in font_keys:
- font = fitz_fontdescriptors[fkey]
- bold = font["bold"] # determine font property
- italic = font["italic"] # determine font property
- fbuff = font["loader"]() # load the fontbuffer
- archive.add(fbuff, fkey) # update the archive
- bold_text = "font-weight: bold;" if bold else ""
- italic_text = "font-style: italic;" if italic else ""
- CSS += CSSFONT % (name, fkey, bold_text, italic_text)
- return CSS
- def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float:
- """Calculate length of a string for a built-in font.
- Args:
- fontname: name of the font.
- fontsize: font size points.
- encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic.
- Returns:
- (float) length of text.
- """
- fontname = fontname.lower()
- basename = Base14_fontdict.get(fontname, None)
- glyphs = None
- if basename == "Symbol":
- glyphs = symbol_glyphs
- if basename == "ZapfDingbats":
- glyphs = zapf_glyphs
- if glyphs is not None:
- w = sum([glyphs[ord(c)][1] if ord(c) < 256 else glyphs[183][1] for c in text])
- return w * fontsize
- if fontname in Base14_fontdict.keys():
- return util_measure_string(
- text, Base14_fontdict[fontname], fontsize, encoding
- )
- if fontname in (
- "china-t",
- "china-s",
- "china-ts",
- "china-ss",
- "japan",
- "japan-s",
- "korea",
- "korea-s",
- ):
- return len(text) * fontsize
- raise ValueError("Font '%s' is unsupported" % fontname)
- def image_profile(img: ByteString) -> dict:
- """ Return basic properties of an image.
- Args:
- img: bytes, bytearray, io.BytesIO object or an opened image file.
- Returns:
- A dictionary with keys width, height, colorspace.n, bpc, type, ext and size,
- where 'type' is the MuPDF image type (0 to 14) and 'ext' the suitable
- file extension.
- """
- if type(img) is io.BytesIO:
- stream = img.getvalue()
- elif hasattr(img, "read"):
- stream = img.read()
- elif type(img) in (bytes, bytearray):
- stream = img
- else:
- raise ValueError("bad argument 'img'")
- return TOOLS.image_profile(stream)
- def jm_append_merge(dev):
- '''
- Append current path to list or merge into last path of the list.
- (1) Append if first path, different item lists or not a 'stroke' version
- of previous path
- (2) If new path has the same items, merge its content into previous path
- and change path["type"] to "fs".
- (3) If "out" is callable, skip the previous and pass dictionary to it.
- '''
- #log(f'{getattr(dev, "pathdict", None)=}')
- assert isinstance(dev.out, list)
- #log( f'{dev.out=}')
-
- if callable(dev.method) or dev.method: # function or method
- # callback.
- if dev.method is None:
- # fixme, this surely cannot happen?
- assert 0
- #resp = PyObject_CallFunctionObjArgs(out, dev.pathdict, NULL)
- else:
- #log(f'calling {dev.out=} {dev.method=} {dev.pathdict=}')
- resp = getattr(dev.out, dev.method)(dev.pathdict)
- if not resp:
- message("calling cdrawings callback function/method failed!")
- dev.pathdict = None
- return
-
- def append():
- #log(f'jm_append_merge(): clearing dev.pathdict')
- dev.out.append(dev.pathdict.copy())
- dev.pathdict.clear()
- assert isinstance(dev.out, list)
- len_ = len(dev.out) # len of output list so far
- #log('{len_=}')
- if len_ == 0: # always append first path
- return append()
- #log(f'{getattr(dev, "pathdict", None)=}')
- thistype = dev.pathdict[ dictkey_type]
- #log(f'{thistype=}')
- if thistype != 's': # if not stroke, then append
- return append()
- prev = dev.out[ len_-1] # get prev path
- #log( f'{prev=}')
- prevtype = prev[ dictkey_type]
- #log( f'{prevtype=}')
- if prevtype != 'f': # if previous not fill, append
- return append()
- # last check: there must be the same list of items for "f" and "s".
- previtems = prev[ dictkey_items]
- thisitems = dev.pathdict[ dictkey_items]
- if previtems != thisitems:
- return append()
-
- #rc = PyDict_Merge(prev, dev.pathdict, 0); // merge with no override
- try:
- for k, v in dev.pathdict.items():
- if k not in prev:
- prev[k] = v
- rc = 0
- except Exception:
- if g_exceptions_verbose: exception_info()
- #raise
- rc = -1
- if rc == 0:
- prev[ dictkey_type] = 'fs'
- dev.pathdict.clear()
- else:
- message("could not merge stroke and fill path")
- append()
- def jm_bbox_add_rect( dev, ctx, rect, code):
- if not dev.layers:
- dev.result.append( (code, JM_py_from_rect(rect)))
- else:
- dev.result.append( (code, JM_py_from_rect(rect), dev.layer_name))
- def jm_bbox_fill_image( dev, ctx, image, ctm, alpha, color_params):
- r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- r = mupdf.ll_fz_transform_rect( r.internal(), ctm)
- jm_bbox_add_rect( dev, ctx, r, "fill-image")
- def jm_bbox_fill_image_mask( dev, ctx, image, ctm, colorspace, color, alpha, color_params):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_transform_rect(mupdf.fz_unit_rect, ctm), "fill-imgmask")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
- even_odd = True if even_odd else False
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path(path, None, ctm), "fill-path")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_fill_shade( dev, ctx, shade, ctm, alpha, color_params):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_shade( shade, ctm), "fill-shade")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_stroke_text( dev, ctx, text, stroke, ctm, *args):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, stroke, ctm), "stroke-text")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_fill_text( dev, ctx, text, ctm, *args):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, None, ctm), "fill-text")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_ignore_text( dev, ctx, text, ctm):
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text(text, None, ctm), "ignore-text")
- def jm_bbox_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path( path, stroke, ctm), "stroke-path")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_checkquad(dev):
- '''
- Check whether the last 4 lines represent a quad.
- Because of how we count, the lines are a polyline already, i.e. last point
- of a line equals 1st point of next line.
- So we check for a polygon (last line's end point equals start point).
- If not true we return 0.
- '''
- #log(f'{getattr(dev, "pathdict", None)=}')
- items = dev.pathdict[ dictkey_items]
- len_ = len(items)
- f = [0] * 8 # coordinates of the 4 corners
- # fill the 8 floats in f, start from items[-4:]
- for i in range( 4): # store line start points
- line = items[ len_ - 4 + i]
- temp = JM_point_from_py( line[1])
- f[i * 2] = temp.x
- f[i * 2 + 1] = temp.y
- lp = JM_point_from_py( line[ 2])
- if lp.x != f[0] or lp.y != f[1]:
- # not a polygon!
- #dev.linecount -= 1
- return 0
-
- # we have detected a quad
- dev.linecount = 0 # reset this
- # a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items
- # are pairs of floats representing a quad corner each.
-
- # relationship of float array to quad points:
- # (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr
- q = mupdf.fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5])
- rect = ('qu', JM_py_from_quad(q))
-
- items[ len_ - 4] = rect # replace item -4 by rect
- del items[ len_ - 3 : len_] # delete remaining 3 items
- return 1
- def jm_checkrect(dev):
- '''
- Check whether the last 3 path items represent a rectangle.
- Returns 1 if we have modified the path, otherwise 0.
- '''
- #log(f'{getattr(dev, "pathdict", None)=}')
- dev.linecount = 0 # reset line count
- orientation = 0 # area orientation of rectangle
- items = dev.pathdict[ dictkey_items]
- len_ = len(items)
- line0 = items[ len_ - 3]
- ll = JM_point_from_py( line0[ 1])
- lr = JM_point_from_py( line0[ 2])
- # no need to extract "line1"!
- line2 = items[ len_ - 1]
- ur = JM_point_from_py( line2[ 1])
- ul = JM_point_from_py( line2[ 2])
- # Assumption:
- # When decomposing rects, MuPDF always starts with a horizontal line,
- # followed by a vertical line, followed by a horizontal line.
- # First line: (ll, lr), third line: (ul, ur).
- # If 1st line is below 3rd line, we record anti-clockwise (+1), else
- # clockwise (-1) orientation.
-
- if (0
- or ll.y != lr.y
- or ll.x != ul.x
- or ur.y != ul.y
- or ur.x != lr.x
- ):
- return 0 # not a rectangle
-
- # we have a rect, replace last 3 "l" items by one "re" item.
- if ul.y < lr.y:
- r = mupdf.fz_make_rect(ul.x, ul.y, lr.x, lr.y)
- orientation = 1
- else:
- r = mupdf.fz_make_rect(ll.x, ll.y, ur.x, ur.y)
- orientation = -1
-
- rect = ( 're', JM_py_from_rect(r), orientation)
- items[ len_ - 3] = rect # replace item -3 by rect
- del items[ len_ - 2 : len_] # delete remaining 2 items
- return 1
- def jm_trace_text( dev, text, type_, ctm, colorspace, color, alpha, seqno):
- span = text.head
- while 1:
- if not span:
- break
- jm_trace_text_span( dev, span, type_, ctm, colorspace, color, alpha, seqno)
- span = span.next
- def jm_trace_text_span(dev, span, type_, ctm, colorspace, color, alpha, seqno):
- '''
- jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, size_t seqno)
- '''
- out_font = None
- assert isinstance( span, mupdf.fz_text_span)
- span = mupdf.FzTextSpan( span)
- assert isinstance( ctm, mupdf.fz_matrix)
- ctm = mupdf.FzMatrix( ctm)
- fontname = JM_font_name( span.font())
- #float rgb[3];
- #PyObject *chars = PyTuple_New(span->len);
-
- mat = mupdf.fz_concat(span.trm(), ctm) # text transformation matrix
- dir = mupdf.fz_transform_vector(mupdf.fz_make_point(1, 0), mat) # writing direction
- fsize = math.sqrt(dir.x * dir.x + dir.y * dir.y) # font size
- dir = mupdf.fz_normalize_vector(dir)
- space_adv = 0
- asc = JM_font_ascender( span.font())
- dsc = JM_font_descender( span.font())
- if asc < 1e-3: # probably Tesseract font
- dsc = -0.1
- asc = 0.9
- # compute effective ascender / descender
- ascsize = asc * fsize / (asc - dsc)
- dscsize = dsc * fsize / (asc - dsc)
- fflags = 0 # font flags
- mono = mupdf.fz_font_is_monospaced( span.font())
- fflags += mono * TEXT_FONT_MONOSPACED
- fflags += mupdf.fz_font_is_italic( span.font()) * TEXT_FONT_ITALIC
- fflags += mupdf.fz_font_is_serif( span.font()) * TEXT_FONT_SERIFED
- fflags += mupdf.fz_font_is_bold( span.font()) * TEXT_FONT_BOLD
- last_adv = 0
- # walk through characters of span
- span_bbox = mupdf.FzRect()
- rot = mupdf.fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0)
- if dir.x == -1: # left-right flip
- rot.d = 1
- chars = []
- for i in range( span.m_internal.len):
- adv = 0
- if span.items(i).gid >= 0:
- adv = mupdf.fz_advance_glyph( span.font(), span.items(i).gid, span.m_internal.wmode)
- adv *= fsize
- last_adv = adv
- if span.items(i).ucs == 32:
- space_adv = adv
- char_orig = mupdf.fz_make_point(span.items(i).x, span.items(i).y)
- char_orig = mupdf.fz_transform_point(char_orig, ctm)
- m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y)
- m1 = mupdf.fz_concat(m1, rot)
- m1 = mupdf.fz_concat(m1, mupdf.FzMatrix(1, 0, 0, 1, char_orig.x, char_orig.y))
- x0 = char_orig.x
- x1 = x0 + adv
- if (
- (mat.d > 0 and (dir.x == 1 or dir.x == -1))
- or
- (mat.b != 0 and mat.b == -mat.c)
- ): # up-down flip
- y0 = char_orig.y + dscsize
- y1 = char_orig.y + ascsize
- else:
- y0 = char_orig.y - ascsize
- y1 = char_orig.y - dscsize
- char_bbox = mupdf.fz_make_rect(x0, y0, x1, y1)
- char_bbox = mupdf.fz_transform_rect(char_bbox, m1)
- chars.append(
- (
- span.items(i).ucs,
- span.items(i).gid,
- (
- char_orig.x,
- char_orig.y,
- ),
- (
- char_bbox.x0,
- char_bbox.y0,
- char_bbox.x1,
- char_bbox.y1,
- ),
- )
- )
- if i > 0:
- span_bbox = mupdf.fz_union_rect(span_bbox, char_bbox)
- else:
- span_bbox = char_bbox
- chars = tuple(chars)
-
- if not space_adv:
- if not (fflags & TEXT_FONT_MONOSPACED):
- c, out_font = mupdf.fz_encode_character_with_fallback( span.font(), 32, 0, 0)
- space_adv = mupdf.fz_advance_glyph(
- span.font(),
- c,
- span.m_internal.wmode,
- )
- space_adv *= fsize
- if not space_adv:
- space_adv = last_adv
- else:
- space_adv = last_adv # for mono, any char width suffices
- # make the span dictionary
- span_dict = dict()
- span_dict[ 'dir'] = JM_py_from_point(dir)
- span_dict[ 'font'] = JM_EscapeStrFromStr(fontname)
- span_dict[ 'wmode'] = span.m_internal.wmode
- span_dict[ 'flags'] =fflags
- span_dict[ "bidi_lvl"] =span.m_internal.bidi_level
- span_dict[ "bidi_dir"] = span.m_internal.markup_dir
- span_dict[ 'ascender'] = asc
- span_dict[ 'descender'] = dsc
- span_dict[ 'colorspace'] = 3
-
- if colorspace:
- rgb = mupdf.fz_convert_color(
- mupdf.FzColorspace( mupdf.ll_fz_keep_colorspace( colorspace)),
- color,
- mupdf.fz_device_rgb(),
- mupdf.FzColorspace(),
- mupdf.FzColorParams(),
- )
- rgb = rgb[:3] # mupdf.fz_convert_color() always returns 4 items.
- else:
- rgb = (0, 0, 0)
-
- if dev.linewidth > 0: # width of character border
- linewidth = dev.linewidth
- else:
- linewidth = fsize * 0.05 # default: 5% of font size
- #log(f'{dev.linewidth=:.4f} {fsize=:.4f} {linewidth=:.4f}')
-
- span_dict[ 'color'] = rgb
- span_dict[ 'size'] = fsize
- span_dict[ "opacity"] = alpha
- span_dict[ "linewidth"] = linewidth
- span_dict[ "spacewidth"] = space_adv
- span_dict[ 'type'] = type_
- span_dict[ 'bbox'] = JM_py_from_rect(span_bbox)
- span_dict[ 'layer'] = dev.layer_name
- span_dict[ "seqno"] = seqno
- span_dict[ 'chars'] = chars
- #log(f'{span_dict=}')
- dev.out.append( span_dict)
- def jm_lineart_color(colorspace, color):
- #log(f' ')
- if colorspace:
- try:
- # Need to be careful to use a named Python object to ensure
- # that the `params` we pass to mupdf.ll_fz_convert_color() is
- # valid. E.g. doing:
- #
- # rgb = mupdf.ll_fz_convert_color(..., mupdf.FzColorParams().internal())
- #
- # - seems to end up with a corrupted `params`.
- #
- cs = mupdf.FzColorspace( mupdf.FzColorspace.Fixed_RGB)
- cp = mupdf.FzColorParams()
- rgb = mupdf.ll_fz_convert_color(
- colorspace,
- color,
- cs.m_internal,
- None,
- cp.internal(),
- )
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- return rgb[:3]
- return ()
- def jm_lineart_drop_device(dev, ctx):
- if isinstance(dev.out, list):
- dev.out = []
- dev.scissors = []
-
-
- def jm_lineart_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
- #log(f'{getattr(dev, "pathdict", None)=}')
- #log(f'jm_lineart_fill_path(): {dev.seqno=}')
- even_odd = True if even_odd else False
- try:
- assert isinstance( ctm, mupdf.fz_matrix)
- dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
- dev.path_type = trace_device_FILL_PATH
- jm_lineart_path( dev, ctx, path)
- if dev.pathdict is None:
- return
- #item_count = len(dev.pathdict[ dictkey_items])
- #if item_count == 0:
- # return
- dev.pathdict[ dictkey_type] ="f"
- dev.pathdict[ "even_odd"] = even_odd
- dev.pathdict[ "fill_opacity"] = alpha
- #log(f'setting dev.pathdict[ "closePath"] to false')
- #dev.pathdict[ "closePath"] = False
- dev.pathdict[ "fill"] = jm_lineart_color( colorspace, color)
- dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
- dev.pathdict[ "seqno"] = dev.seqno
- #jm_append_merge(dev)
- dev.pathdict[ 'layer'] = dev.layer_name
- if dev.clips:
- dev.pathdict[ 'level'] = dev.depth
- jm_append_merge(dev)
- dev.seqno += 1
- #log(f'jm_lineart_fill_path() end: {getattr(dev, "pathdict", None)=}')
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- # There are 3 text trace types:
- # 0 - fill text (PDF Tr 0)
- # 1 - stroke text (PDF Tr 1)
- # 3 - ignore text (PDF Tr 3)
- def jm_lineart_fill_text( dev, ctx, text, ctm, colorspace, color, alpha, color_params):
- if 0:
- log(f'{type(ctx)=} {ctx=}')
- log(f'{type(dev)=} {dev=}')
- log(f'{type(text)=} {text=}')
- log(f'{type(ctm)=} {ctm=}')
- log(f'{type(colorspace)=} {colorspace=}')
- log(f'{type(color)=} {color=}')
- log(f'{type(alpha)=} {alpha=}')
- log(f'{type(color_params)=} {color_params=}')
- jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev.seqno)
- dev.seqno += 1
- def jm_lineart_ignore_text(dev, text, ctm):
- #log(f'{getattr(dev, "pathdict", None)=}')
- jm_trace_text(dev, text, 3, ctm, None, None, 1, dev.seqno)
- dev.seqno += 1
- class Walker(mupdf.FzPathWalker2):
- def __init__(self, dev):
- super().__init__()
- self.use_virtual_moveto()
- self.use_virtual_lineto()
- self.use_virtual_curveto()
- self.use_virtual_closepath()
- self.dev = dev
- def closepath(self, ctx): # trace_close().
- #log(f'Walker(): {self.dev.pathdict=}')
- try:
- if self.dev.linecount == 3:
- if jm_checkrect(self.dev):
- #log(f'end1: {self.dev.pathdict=}')
- return
- self.dev.linecount = 0 # reset # of consec. lines
- if self.dev.havemove:
- if self.dev.lastpoint != self.dev.firstpoint:
- item = ("l", JM_py_from_point(self.dev.lastpoint),
- JM_py_from_point(self.dev.firstpoint))
- self.dev.pathdict[dictkey_items].append(item)
- self.dev.lastpoint = self.dev.firstpoint
- self.dev.pathdict["closePath"] = False
- else:
- #log('setting self.dev.pathdict[ "closePath"] to true')
- self.dev.pathdict[ "closePath"] = True
- #log(f'end2: {self.dev.pathdict=}')
- self.dev.havemove = 0
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def curveto(self, ctx, x1, y1, x2, y2, x3, y3): # trace_curveto().
- #log(f'Walker(): {self.dev.pathdict=}')
- try:
- self.dev.linecount = 0 # reset # of consec. lines
- p1 = mupdf.fz_make_point(x1, y1)
- p2 = mupdf.fz_make_point(x2, y2)
- p3 = mupdf.fz_make_point(x3, y3)
- p1 = mupdf.fz_transform_point(p1, self.dev.ctm)
- p2 = mupdf.fz_transform_point(p2, self.dev.ctm)
- p3 = mupdf.fz_transform_point(p3, self.dev.ctm)
- self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p1)
- self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p2)
- self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p3)
- list_ = (
- "c",
- JM_py_from_point(self.dev.lastpoint),
- JM_py_from_point(p1),
- JM_py_from_point(p2),
- JM_py_from_point(p3),
- )
- self.dev.lastpoint = p3
- self.dev.pathdict[ dictkey_items].append( list_)
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def lineto(self, ctx, x, y): # trace_lineto().
- #log(f'Walker(): {self.dev.pathdict=}')
- try:
- p1 = mupdf.fz_transform_point( mupdf.fz_make_point(x, y), self.dev.ctm)
- self.dev.pathrect = mupdf.fz_include_point_in_rect( self.dev.pathrect, p1)
- list_ = (
- 'l',
- JM_py_from_point( self.dev.lastpoint),
- JM_py_from_point(p1),
- )
- self.dev.lastpoint = p1
- items = self.dev.pathdict[ dictkey_items]
- items.append( list_)
- self.dev.linecount += 1 # counts consecutive lines
- if self.dev.linecount == 4 and self.dev.path_type != trace_device_FILL_PATH:
- # shrink to "re" or "qu" item
- jm_checkquad(self.dev)
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def moveto(self, ctx, x, y): # trace_moveto().
- if 0 and isinstance(self.dev.pathdict, dict):
- log(f'self.dev.pathdict:')
- for n, v in self.dev.pathdict.items():
- log( ' {type(n)=} {len(n)=} {n!r} {n}: {v!r}: {v}')
- #log(f'Walker(): {type(self.dev.pathdict)=} {self.dev.pathdict=}')
- try:
- #log( '{=dev.ctm type(dev.ctm)}')
- self.dev.lastpoint = mupdf.fz_transform_point(
- mupdf.fz_make_point(x, y),
- self.dev.ctm,
- )
- if mupdf.fz_is_infinite_rect( self.dev.pathrect):
- self.dev.pathrect = mupdf.fz_make_rect(
- self.dev.lastpoint.x,
- self.dev.lastpoint.y,
- self.dev.lastpoint.x,
- self.dev.lastpoint.y,
- )
- self.dev.firstpoint = self.dev.lastpoint
- self.dev.havemove = 1
- self.dev.linecount = 0 # reset # of consec. lines
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_lineart_path(dev, ctx, path):
- '''
- Create the "items" list of the path dictionary
- * either create or empty the path dictionary
- * reset the end point of the path
- * reset count of consecutive lines
- * invoke fz_walk_path(), which create the single items
- * if no items detected, empty path dict again
- '''
- #log(f'{getattr(dev, "pathdict", None)=}')
- try:
- dev.pathrect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
- dev.linecount = 0
- dev.lastpoint = mupdf.FzPoint( 0, 0)
- dev.pathdict = dict()
- dev.pathdict[ dictkey_items] = []
-
- # First time we create a Walker instance is slow, e.g. 0.3s, then later
- # times run in around 0.01ms. If Walker is defined locally instead of
- # globally, each time takes 0.3s.
- #
- walker = Walker(dev)
- # Unlike fz_run_page(), fz_path_walker callbacks are not passed
- # a pointer to the struct, instead they get an arbitrary
- # void*. The underlying C++ Director callbacks use this void* to
- # identify the fz_path_walker instance so in turn we need to pass
- # arg=walker.m_internal.
- mupdf.fz_walk_path( mupdf.FzPath(mupdf.ll_fz_keep_path(path)), walker, walker.m_internal)
- # Check if any items were added ...
- if not dev.pathdict[ dictkey_items]:
- dev.pathdict = None
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_lineart_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
- #log(f'{dev.pathdict=} {dev.clips=}')
- try:
- assert isinstance( ctm, mupdf.fz_matrix)
- dev.pathfactor = 1
- if ctm.a != 0 and abs(ctm.a) == abs(ctm.d):
- dev.pathfactor = abs(ctm.a)
- elif ctm.b != 0 and abs(ctm.b) == abs(ctm.c):
- dev.pathfactor = abs(ctm.b)
- dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
- dev.path_type = trace_device_STROKE_PATH
- jm_lineart_path( dev, ctx, path)
- if dev.pathdict is None:
- return
- dev.pathdict[ dictkey_type] = 's'
- dev.pathdict[ 'stroke_opacity'] = alpha
- dev.pathdict[ 'color'] = jm_lineart_color( colorspace, color)
- dev.pathdict[ dictkey_width] = dev.pathfactor * stroke.linewidth
- dev.pathdict[ 'lineCap'] = (
- stroke.start_cap,
- stroke.dash_cap,
- stroke.end_cap,
- )
- dev.pathdict[ 'lineJoin'] = dev.pathfactor * stroke.linejoin
- if 'closePath' not in dev.pathdict:
- #log('setting dev.pathdict["closePath"] to false')
- dev.pathdict['closePath'] = False
- # output the "dashes" string
- if stroke.dash_len:
- buff = mupdf.fz_new_buffer( 256)
- mupdf.fz_append_string( buff, "[ ") # left bracket
- for i in range( stroke.dash_len):
- # We use mupdf python's SWIG-generated floats_getitem() fn to
- # access float *stroke.dash_list[].
- value = mupdf.floats_getitem( stroke.dash_list, i) # stroke.dash_list[i].
- mupdf.fz_append_string( buff, f'{_format_g(dev.pathfactor * value)} ')
- mupdf.fz_append_string( buff, f'] {_format_g(dev.pathfactor * stroke.dash_phase)}')
- dev.pathdict[ 'dashes'] = buff
- else:
- dev.pathdict[ 'dashes'] = '[] 0'
- dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
- dev.pathdict['layer'] = dev.layer_name
- dev.pathdict[ 'seqno'] = dev.seqno
- if dev.clips:
- dev.pathdict[ 'level'] = dev.depth
- jm_append_merge(dev)
- dev.seqno += 1
-
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_lineart_clip_path(dev, ctx, path, even_odd, ctm, scissor):
- if not dev.clips:
- return
- dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
- dev.path_type = trace_device_CLIP_PATH
- jm_lineart_path(dev, ctx, path)
- if dev.pathdict is None:
- return
- dev.pathdict[ dictkey_type] = 'clip'
- dev.pathdict[ 'even_odd'] = bool(even_odd)
- if 'closePath' not in dev.pathdict:
- #log(f'setting dev.pathdict["closePath"] to False')
- dev.pathdict['closePath'] = False
-
- dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
- dev.pathdict['level'] = dev.depth
- dev.pathdict['layer'] = dev.layer_name
- jm_append_merge(dev)
- dev.depth += 1
- def jm_lineart_clip_stroke_path(dev, ctx, path, stroke, ctm, scissor):
- if not dev.clips:
- return
- dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
- dev.path_type = trace_device_CLIP_STROKE_PATH
- jm_lineart_path(dev, ctx, path)
- if dev.pathdict is None:
- return
- dev.pathdict['dictkey_type'] = 'clip'
- dev.pathdict['even_odd'] = None
- if 'closePath' not in dev.pathdict:
- #log(f'setting dev.pathdict["closePath"] to False')
- dev.pathdict['closePath'] = False
- dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
- dev.pathdict['level'] = dev.depth
- dev.pathdict['layer'] = dev.layer_name
- jm_append_merge(dev)
- dev.depth += 1
- def jm_lineart_clip_stroke_text(dev, ctx, text, stroke, ctm, scissor):
- if not dev.clips:
- return
- compute_scissor(dev)
- dev.depth += 1
- def jm_lineart_clip_text(dev, ctx, text, ctm, scissor):
- if not dev.clips:
- return
- compute_scissor(dev)
- dev.depth += 1
- def jm_lineart_clip_image_mask( dev, ctx, image, ctm, scissor):
- if not dev.clips:
- return
- compute_scissor(dev)
- dev.depth += 1
-
- def jm_lineart_pop_clip(dev, ctx):
- if not dev.clips or not dev.scissors:
- return
- len_ = len(dev.scissors)
- if len_ < 1:
- return
- del dev.scissors[-1]
- dev.depth -= 1
- def jm_lineart_begin_layer(dev, ctx, name):
- if name:
- dev.layer_name = name
- else:
- dev.layer_name = ""
- def jm_lineart_end_layer(dev, ctx):
- dev.layer_name = ""
- def jm_lineart_begin_group(dev, ctx, bbox, cs, isolated, knockout, blendmode, alpha):
- #log(f'{dev.pathdict=} {dev.clips=}')
- if not dev.clips:
- return
- dev.pathdict = { # Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}",
- "type": "group",
- "rect": JM_py_from_rect(bbox),
- "isolated": bool(isolated),
- "knockout": bool(knockout),
- "blendmode": mupdf.fz_blendmode_name(blendmode),
- "opacity": alpha,
- "level": dev.depth,
- "layer": dev.layer_name
- }
- jm_append_merge(dev)
- dev.depth += 1
- def jm_lineart_end_group(dev, ctx):
- #log(f'{dev.pathdict=} {dev.clips=}')
- if not dev.clips:
- return
- dev.depth -= 1
- def jm_lineart_stroke_text(dev, ctx, text, stroke, ctm, colorspace, color, alpha, color_params):
- jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev.seqno)
- dev.seqno += 1
- def jm_dev_linewidth( dev, ctx, path, stroke, matrix, colorspace, color, alpha, color_params):
- dev.linewidth = stroke.linewidth
- jm_increase_seqno( dev, ctx)
- def jm_increase_seqno( dev, ctx, *vargs):
- try:
- dev.seqno += 1
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def planish_line(p1: point_like, p2: point_like) -> Matrix:
- """Compute matrix which maps line from p1 to p2 to the x-axis, such that it
- maintains its length and p1 * matrix = Point(0, 0).
- Args:
- p1, p2: point_like
- Returns:
- Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at
- the same distance to Point(0,0). Will always combine a rotation and a
- transformation.
- """
- p1 = Point(p1)
- p2 = Point(p2)
- return Matrix(util_hor_matrix(p1, p2))
- class JM_image_reporter_Filter(mupdf.PdfFilterOptions2):
- def __init__(self):
- super().__init__()
- self.use_virtual_image_filter()
- def image_filter( self, ctx, ctm, name, image):
- assert isinstance(ctm, mupdf.fz_matrix)
- JM_image_filter(self, mupdf.FzMatrix(ctm), name, image)
- if mupdf_cppyy:
- # cppyy doesn't appear to treat returned None as nullptr,
- # resulting in obscure 'python exception' exception.
- return 0
- class JM_new_bbox_device_Device(mupdf.FzDevice2):
- def __init__(self, result, layers):
- super().__init__()
- self.result = result
- self.layers = layers
- self.layer_name = ""
- self.use_virtual_fill_path()
- self.use_virtual_stroke_path()
- self.use_virtual_fill_text()
- self.use_virtual_stroke_text()
- self.use_virtual_ignore_text()
- self.use_virtual_fill_shade()
- self.use_virtual_fill_image()
- self.use_virtual_fill_image_mask()
-
- self.use_virtual_begin_layer()
- self.use_virtual_end_layer()
- begin_layer = jm_lineart_begin_layer
- end_layer = jm_lineart_end_layer
-
- fill_path = jm_bbox_fill_path
- stroke_path = jm_bbox_stroke_path
- fill_text = jm_bbox_fill_text
- stroke_text = jm_bbox_stroke_text
- ignore_text = jm_bbox_ignore_text
- fill_shade = jm_bbox_fill_shade
- fill_image = jm_bbox_fill_image
- fill_image_mask = jm_bbox_fill_image_mask
-
- class JM_new_output_fileptr_Output(mupdf.FzOutput2):
- def __init__(self, bio):
- super().__init__()
- self.bio = bio
- self.use_virtual_write()
- self.use_virtual_seek()
- self.use_virtual_tell()
- self.use_virtual_truncate()
-
- def seek( self, ctx, offset, whence):
- return self.bio.seek( offset, whence)
-
- def tell( self, ctx):
- ret = self.bio.tell()
- return ret
-
- def truncate( self, ctx):
- return self.bio.truncate()
-
- def write(self, ctx, data_raw, data_length):
- data = mupdf.raw_to_python_bytes(data_raw, data_length)
- return self.bio.write(data)
- def compute_scissor(dev):
- '''
- Every scissor of a clip is a sub rectangle of the preceding clip scissor
- if the clip level is larger.
- '''
- if dev.scissors is None:
- dev.scissors = list()
- num_scissors = len(dev.scissors)
- if num_scissors > 0:
- last_scissor = dev.scissors[num_scissors-1]
- scissor = JM_rect_from_py(last_scissor)
- scissor = mupdf.fz_intersect_rect(scissor, dev.pathrect)
- else:
- scissor = dev.pathrect
- dev.scissors.append(JM_py_from_rect(scissor))
- return scissor
- class JM_new_lineart_device_Device(mupdf.FzDevice2):
- '''
- LINEART device for Python method Page.get_cdrawings()
- '''
- #log(f'JM_new_lineart_device_Device()')
- def __init__(self, out, clips, method):
- #log(f'JM_new_lineart_device_Device.__init__()')
- super().__init__()
- # fixme: this results in "Unexpected call of unimplemented virtual_fnptrs fn FzDevice2::drop_device().".
- #self.use_virtual_drop_device()
- self.use_virtual_fill_path()
- self.use_virtual_stroke_path()
- self.use_virtual_clip_path()
- self.use_virtual_clip_image_mask()
- self.use_virtual_clip_stroke_path()
- self.use_virtual_clip_stroke_text()
- self.use_virtual_clip_text()
-
- self.use_virtual_fill_text
- self.use_virtual_stroke_text
- self.use_virtual_ignore_text
-
- self.use_virtual_fill_shade()
- self.use_virtual_fill_image()
- self.use_virtual_fill_image_mask()
-
- self.use_virtual_pop_clip()
-
- self.use_virtual_begin_group()
- self.use_virtual_end_group()
-
- self.use_virtual_begin_layer()
- self.use_virtual_end_layer()
-
- self.out = out
- self.seqno = 0
- self.depth = 0
- self.clips = clips
- self.method = method
-
- self.scissors = None
- self.layer_name = "" # optional content name
- self.pathrect = None
-
- self.linewidth = 0
- self.ptm = mupdf.FzMatrix()
- self.ctm = mupdf.FzMatrix()
- self.rot = mupdf.FzMatrix()
- self.lastpoint = mupdf.FzPoint()
- self.firstpoint = mupdf.FzPoint()
- self.havemove = 0
- self.pathrect = mupdf.FzRect()
- self.pathfactor = 0
- self.linecount = 0
- self.path_type = 0
-
- #drop_device = jm_lineart_drop_device
-
- fill_path = jm_lineart_fill_path
- stroke_path = jm_lineart_stroke_path
- clip_image_mask = jm_lineart_clip_image_mask
- clip_path = jm_lineart_clip_path
- clip_stroke_path = jm_lineart_clip_stroke_path
- clip_text = jm_lineart_clip_text
- clip_stroke_text = jm_lineart_clip_stroke_text
-
- fill_text = jm_increase_seqno
- stroke_text = jm_increase_seqno
- ignore_text = jm_increase_seqno
-
- fill_shade = jm_increase_seqno
- fill_image = jm_increase_seqno
- fill_image_mask = jm_increase_seqno
-
- pop_clip = jm_lineart_pop_clip
-
- begin_group = jm_lineart_begin_group
- end_group = jm_lineart_end_group
-
- begin_layer = jm_lineart_begin_layer
- end_layer = jm_lineart_end_layer
-
- class JM_new_texttrace_device(mupdf.FzDevice2):
- '''
- Trace TEXT device for Python method Page.get_texttrace()
- '''
- def __init__(self, out):
- super().__init__()
- self.use_virtual_fill_path()
- self.use_virtual_stroke_path()
- self.use_virtual_fill_text()
- self.use_virtual_stroke_text()
- self.use_virtual_ignore_text()
- self.use_virtual_fill_shade()
- self.use_virtual_fill_image()
- self.use_virtual_fill_image_mask()
-
- self.use_virtual_begin_layer()
- self.use_virtual_end_layer()
-
- self.out = out
-
- self.seqno = 0
- self.depth = 0
- self.clips = 0
- self.method = None
-
- self.seqno = 0
- self.pathdict = dict()
- self.scissors = list()
- self.linewidth = 0
- self.ptm = mupdf.FzMatrix()
- self.ctm = mupdf.FzMatrix()
- self.rot = mupdf.FzMatrix()
- self.lastpoint = mupdf.FzPoint()
- self.pathrect = mupdf.FzRect()
- self.pathfactor = 0
- self.linecount = 0
- self.path_type = 0
- self.layer_name = ""
-
- fill_path = jm_increase_seqno
- stroke_path = jm_dev_linewidth
- fill_text = jm_lineart_fill_text
- stroke_text = jm_lineart_stroke_text
- ignore_text = jm_lineart_ignore_text
- fill_shade = jm_increase_seqno
- fill_image = jm_increase_seqno
- fill_image_mask = jm_increase_seqno
-
- begin_layer = jm_lineart_begin_layer
- end_layer = jm_lineart_end_layer
- def ConversionHeader(i: str, filename: OptStr ="unknown"):
- t = i.lower()
- import textwrap
- html = textwrap.dedent("""
- <!DOCTYPE html>
- <html>
- <head>
- <style>
- body{background-color:gray}
- div{position:relative;background-color:white;margin:1em auto}
- p{position:absolute;margin:0}
- img{position:absolute}
- </style>
- </head>
- <body>
- """)
- xml = textwrap.dedent("""
- <?xml version="1.0"?>
- <document name="%s">
- """
- % filename
- )
- xhtml = textwrap.dedent("""
- <?xml version="1.0"?>
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <style>
- body{background-color:gray}
- div{background-color:white;margin:1em;padding:1em}
- p{white-space:pre-wrap}
- </style>
- </head>
- <body>
- """)
- text = ""
- json = '{"document": "%s", "pages": [\n' % filename
- if t == "html":
- r = html
- elif t == "json":
- r = json
- elif t == "xml":
- r = xml
- elif t == "xhtml":
- r = xhtml
- else:
- r = text
- return r
- def ConversionTrailer(i: str):
- t = i.lower()
- text = ""
- json = "]\n}"
- html = "</body>\n</html>\n"
- xml = "</document>\n"
- xhtml = html
- if t == "html":
- r = html
- elif t == "json":
- r = json
- elif t == "xml":
- r = xml
- elif t == "xhtml":
- r = xhtml
- else:
- r = text
- return r
- def annot_preprocess(page: "Page") -> int:
- """Prepare for annotation insertion on the page.
- Returns:
- Old page rotation value. Temporarily sets rotation to 0 when required.
- """
- CheckParent(page)
- if not page.parent.is_pdf:
- raise ValueError("is no PDF")
- old_rotation = page.rotation
- if old_rotation != 0:
- page.set_rotation(0)
- return old_rotation
- def annot_postprocess(page: "Page", annot: "Annot") -> None:
- """Clean up after annotation insertion.
- Set ownership flag and store annotation in page annotation dictionary.
- """
- #annot.parent = weakref.proxy(page)
- assert isinstance( page, Page)
- assert isinstance( annot, Annot)
- annot.parent = page
- page._annot_refs[id(annot)] = annot
- annot.thisown = True
- def canon(c):
- assert isinstance(c, int)
- # TODO: proper unicode case folding
- # TODO: character equivalence (a matches ä, etc)
- if c == 0xA0 or c == 0x2028 or c == 0x2029:
- return ord(' ')
- if c == ord('\r') or c == ord('\n') or c == ord('\t'):
- return ord(' ')
- if c >= ord('A') and c <= ord('Z'):
- return c - ord('A') + ord('a')
- return c
- def chartocanon(s):
- assert isinstance(s, str)
- n, c = mupdf.fz_chartorune(s)
- c = canon(c)
- return n, c
- def dest_is_valid(o, page_count, page_object_nums, names_list):
- p = mupdf.pdf_dict_get( o, PDF_NAME('A'))
- if (
- mupdf.pdf_name_eq(
- mupdf.pdf_dict_get( p, PDF_NAME('S')),
- PDF_NAME('GoTo')
- )
- and not string_in_names_list(
- mupdf.pdf_dict_get( p, PDF_NAME('D')),
- names_list
- )
- ):
- return 0
- p = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
- if not p.m_internal:
- pass
- elif mupdf.pdf_is_string( p):
- return string_in_names_list( p, names_list)
- elif not dest_is_valid_page(
- mupdf.pdf_array_get( p, 0),
- page_object_nums,
- page_count,
- ):
- return 0
- return 1
- def dest_is_valid_page(obj, page_object_nums, pagecount):
- num = mupdf.pdf_to_num(obj)
- if num == 0:
- return 0
- for i in range(pagecount):
- if page_object_nums[i] == num:
- return 1
- return 0
- def find_string(s, needle):
- assert isinstance(s, str)
- for i in range(len(s)):
- end = match_string(s[i:], needle)
- if end is not None:
- end += i
- return i, end
- return None, None
- def get_pdf_now() -> str:
- '''
- "Now" timestamp in PDF Format
- '''
- import time
- tz = "%s'%s'" % (
- str(abs(time.altzone // 3600)).rjust(2, "0"),
- str((abs(time.altzone // 60) % 60)).rjust(2, "0"),
- )
- tstamp = time.strftime("D:%Y%m%d%H%M%S", time.localtime())
- if time.altzone > 0:
- tstamp += "-" + tz
- elif time.altzone < 0:
- tstamp += "+" + tz
- else:
- pass
- return tstamp
- class ElementPosition(object):
- """Convert a dictionary with element position information to an object."""
- def __init__(self):
- pass
- def make_story_elpos():
- return ElementPosition()
-
- def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list:
- """Return rectangles of text lines between two points.
- Notes:
- The default of 'start' is top-left of 'clip'. The default of 'stop'
- is bottom-reight of 'clip'.
- Args:
- start: start point_like
- stop: end point_like, must be 'below' start
- clip: consider this rect_like only, default is page rectangle
- Returns:
- List of line bbox intersections with the area established by the
- parameters.
- """
- # validate and normalize arguments
- if clip is None:
- clip = page.rect
- clip = Rect(clip)
- if start is None:
- start = clip.tl
- if stop is None:
- stop = clip.br
- clip.y0 = start.y
- clip.y1 = stop.y
- if clip.is_empty or clip.is_infinite:
- return []
- # extract text of page, clip only, no images, expand ligatures
- blocks = page.get_text(
- "dict", flags=0, clip=clip,
- )["blocks"]
- lines = [] # will return this list of rectangles
- for b in blocks:
- bbox = Rect(b["bbox"])
- if bbox.is_infinite or bbox.is_empty:
- continue
- for line in b["lines"]:
- bbox = Rect(line["bbox"])
- if bbox.is_infinite or bbox.is_empty:
- continue
- lines.append(bbox)
- if lines == []: # did not select anything
- return lines
- lines.sort(key=lambda bbox: bbox.y1) # sort by vertical positions
- # cut off prefix from first line if start point is close to its top
- bboxf = lines.pop(0)
- if bboxf.y0 - start.y <= 0.1 * bboxf.height: # close enough?
- r = Rect(start.x, bboxf.y0, bboxf.br) # intersection rectangle
- if not (r.is_empty or r.is_infinite):
- lines.insert(0, r) # insert again if not empty
- else:
- lines.insert(0, bboxf) # insert again
- if lines == []: # the list might have been emptied
- return lines
- # cut off suffix from last line if stop point is close to its bottom
- bboxl = lines.pop()
- if stop.y - bboxl.y1 <= 0.1 * bboxl.height: # close enough?
- r = Rect(bboxl.tl, stop.x, bboxl.y1) # intersection rectangle
- if not (r.is_empty or r.is_infinite):
- lines.append(r) # append if not empty
- else:
- lines.append(bboxl) # append again
- return lines
- def glyph_name_to_unicode(name: str) -> int:
- """Convenience function accessing unicodedata."""
- import unicodedata
- try:
- unc = ord(unicodedata.lookup(name))
- except Exception:
- unc = 65533
- return unc
- def hdist(dir, a, b):
- dx = b.x - a.x
- dy = b.y - a.y
- return mupdf.fz_abs(dx * dir.x + dy * dir.y)
- def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> list:
- """Return a list of (rows x cols) equal sized rectangles.
- Notes:
- A utility to fill a given area with table cells of equal size.
- Args:
- rect: rect_like to use as the table area
- rows: number of rows
- cols: number of columns
- Returns:
- A list with <rows> items, where each item is a list of <cols>
- PyMuPDF Rect objects of equal sizes.
- """
- rect = Rect(rect) # ensure this is a Rect
- if rect.is_empty or rect.is_infinite:
- raise ValueError("rect must be finite and not empty")
- tl = rect.tl
- height = rect.height / rows # height of one table cell
- width = rect.width / cols # width of one table cell
- delta_h = (width, 0, width, 0) # diff to next right rect
- delta_v = (0, height, 0, height) # diff to next lower rect
- r = Rect(tl, tl.x + width, tl.y + height) # first rectangle
- # make the first row
- row = [r]
- for i in range(1, cols):
- r += delta_h # build next rect to the right
- row.append(r)
- # make result, starts with first row
- rects = [row]
- for i in range(1, rows):
- row = rects[i - 1] # take previously appended row
- nrow = [] # the new row to append
- for r in row: # for each previous cell add its downward copy
- nrow.append(r + delta_v)
- rects.append(nrow) # append new row to result
- return rects
- def util_ensure_widget_calc(annot):
- '''
- Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO
- '''
- annot_obj = mupdf.pdf_annot_obj(annot.this)
- pdf = mupdf.pdf_get_bound_document(annot_obj)
- PDFNAME_CO = mupdf.pdf_new_name("CO") # = PDF_NAME(CO)
- acro = mupdf.pdf_dict_getl( # get AcroForm dict
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- )
- CO = mupdf.pdf_dict_get(acro, PDFNAME_CO) # = AcroForm/CO
- if not mupdf.pdf_is_array(CO):
- CO = mupdf.pdf_dict_put_array(acro, PDFNAME_CO, 2)
- n = mupdf.pdf_array_len(CO)
- found = 0
- xref = mupdf.pdf_to_num(annot_obj)
- for i in range(n):
- nxref = mupdf.pdf_to_num(mupdf.pdf_array_get(CO, i))
- if xref == nxref:
- found = 1
- break
- if not found:
- mupdf.pdf_array_push(CO, mupdf.pdf_new_indirect(pdf, xref, 0))
- def util_make_rect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
- '''
- Helper for initialising rectangle classes.
-
- 2022-09-02: This is quite different from PyMuPDF's util_make_rect(), which
- uses `goto` in ways that don't easily translate to Python.
- Returns (x0, y0, x1, y1) derived from <args>, then override with p0, p1,
- x0, y0, x1, y1 if they are not None.
- Accepts following forms for <args>:
- () returns all zeros.
- (top-left, bottom-right)
- (top-left, x1, y1)
- (x0, y0, bottom-right)
- (x0, y0, x1, y1)
- (rect)
- Where top-left and bottom-right are (x, y) or something with .x, .y
- members; rect is something with .x0, .y0, .x1, and .y1 members.
- 2023-11-18: we now override with p0, p1, x0, y0, x1, y1 if not None.
- '''
- def get_xy( arg):
- if isinstance( arg, (list, tuple)) and len( arg) == 2:
- return arg[0], arg[1]
- if isinstance( arg, (Point, mupdf.FzPoint, mupdf.fz_point)):
- return arg.x, arg.y
- return None, None
- def make_tuple( a):
- if isinstance( a, tuple):
- return a
- if isinstance( a, Point):
- return a.x, a.y
- elif isinstance( a, (Rect, IRect, mupdf.FzRect, mupdf.fz_rect)):
- return a.x0, a.y0, a.x1, a.y1
- if not isinstance( a, (list, tuple)):
- a = a,
- return a
- def handle_args():
- if len(args) == 0:
- return 0, 0, 0, 0
- elif len(args) == 1:
- arg = args[0]
- if isinstance( arg, (list, tuple)) and len( arg) == 2:
- p1, p2 = arg
- ret = *p1, *p2
- assert len(ret) == 4
- return ret
- if isinstance( arg, (list, tuple)) and len( arg) == 3:
- a, b, c = arg
- a = make_tuple(a)
- b = make_tuple(b)
- c = make_tuple(c)
- ret = *a, *b, *c
- assert len(ret) == 4
- return ret
- ret = make_tuple( arg)
- assert len(ret) == 4, f'{arg=} {ret=}'
- return ret
- elif len(args) == 2:
- ret = get_xy( args[0]) + get_xy( args[1])
- assert len(ret) == 4
- return ret
- elif len(args) == 3:
- x0, y0 = get_xy( args[0])
- if (x0, y0) != (None, None):
- return x0, y0, args[1], args[2]
- x1, y1 = get_xy( args[2])
- if (x1, y1) != (None, None):
- return args[0], args[1], x1, y1
- elif len(args) == 4:
- return args[0], args[1], args[2], args[3]
- raise Exception( f'Unrecognised args: {args}')
- ret_x0, ret_y0, ret_x1, ret_y1 = handle_args()
- if p0 is not None: ret_x0, ret_y0 = get_xy(p0)
- if p1 is not None: ret_x1, ret_y1 = get_xy(p1)
- if x0 is not None: ret_x0 = x0
- if y0 is not None: ret_y0 = y0
- if x1 is not None: ret_x1 = x1
- if y1 is not None: ret_y1 = y1
- return ret_x0, ret_y0, ret_x1, ret_y1
- def util_make_irect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
- a, b, c, d = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
- def convert(x, ceil):
- if ceil:
- return int(math.ceil(x))
- else:
- return int(math.floor(x))
- a = convert(a, False)
- b = convert(b, False)
- c = convert(c, True)
- d = convert(d, True)
- return a, b, c, d
- def util_round_rect( rect):
- return JM_py_from_irect(mupdf.fz_round_rect(JM_rect_from_py(rect)))
- def util_transform_rect( rect, matrix):
- if g_use_extra:
- return extra.util_transform_rect( rect, matrix)
- return JM_py_from_rect(mupdf.fz_transform_rect(JM_rect_from_py(rect), JM_matrix_from_py(matrix)))
- def util_intersect_rect( r1, r2):
- return JM_py_from_rect(
- mupdf.fz_intersect_rect(
- JM_rect_from_py(r1),
- JM_rect_from_py(r2),
- )
- )
- def util_is_point_in_rect( p, r):
- return mupdf.fz_is_point_inside_rect(
- JM_point_from_py(p),
- JM_rect_from_py(r),
- )
- def util_include_point_in_rect( r, p):
- return JM_py_from_rect(
- mupdf.fz_include_point_in_rect(
- JM_rect_from_py(r),
- JM_point_from_py(p),
- )
- )
- def util_point_in_quad( P, Q):
- p = JM_point_from_py(P)
- q = JM_quad_from_py(Q)
- return mupdf.fz_is_point_inside_quad(p, q)
- def util_transform_point( point, matrix):
- return JM_py_from_point(
- mupdf.fz_transform_point(
- JM_point_from_py(point),
- JM_matrix_from_py(matrix),
- )
- )
- def util_union_rect( r1, r2):
- return JM_py_from_rect(
- mupdf.fz_union_rect(
- JM_rect_from_py(r1),
- JM_rect_from_py(r2),
- )
- )
- def util_concat_matrix( m1, m2):
- return JM_py_from_matrix(
- mupdf.fz_concat(
- JM_matrix_from_py(m1),
- JM_matrix_from_py(m2),
- )
- )
- def util_invert_matrix(matrix):
- if 0:
- # Use MuPDF's fz_invert_matrix().
- if isinstance( matrix, (tuple, list)):
- matrix = mupdf.FzMatrix( *matrix)
- elif isinstance( matrix, mupdf.fz_matrix):
- matrix = mupdf.FzMatrix( matrix)
- elif isinstance( matrix, Matrix):
- matrix = mupdf.FzMatrix( matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f)
- assert isinstance( matrix, mupdf.FzMatrix), f'{type(matrix)=}: {matrix}'
- ret = mupdf.fz_invert_matrix( matrix)
- if ret == matrix and (0
- or abs( matrix.a - 1) >= sys.float_info.epsilon
- or abs( matrix.b - 0) >= sys.float_info.epsilon
- or abs( matrix.c - 0) >= sys.float_info.epsilon
- or abs( matrix.d - 1) >= sys.float_info.epsilon
- ):
- # Inversion not possible.
- return 1, ()
- return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f)
- # Do inversion in python.
- src = JM_matrix_from_py(matrix)
- a = src.a
- det = a * src.d - src.b * src.c
- if det < -sys.float_info.epsilon or det > sys.float_info.epsilon:
- dst = mupdf.FzMatrix()
- rdet = 1 / det
- dst.a = src.d * rdet
- dst.b = -src.b * rdet
- dst.c = -src.c * rdet
- dst.d = a * rdet
- a = -src.e * dst.a - src.f * dst.c
- dst.f = -src.e * dst.b - src.f * dst.d
- dst.e = a
- return 0, (dst.a, dst.b, dst.c, dst.d, dst.e, dst.f)
- return 1, ()
- def util_measure_string( text, fontname, fontsize, encoding):
- font = mupdf.fz_new_base14_font(fontname)
- w = 0
- pos = 0
- while pos < len(text):
- t, c = mupdf.fz_chartorune(text[pos:])
- pos += t
- if encoding == mupdf.PDF_SIMPLE_ENCODING_GREEK:
- c = mupdf.fz_iso8859_7_from_unicode(c)
- elif encoding == mupdf.PDF_SIMPLE_ENCODING_CYRILLIC:
- c = mupdf.fz_windows_1251_from_unicode(c)
- else:
- c = mupdf.fz_windows_1252_from_unicode(c)
- if c < 0:
- c = 0xB7
- g = mupdf.fz_encode_character(font, c)
- dw = mupdf.fz_advance_glyph(font, g, 0)
- w += dw
- ret = w * fontsize
- return ret
- def util_sine_between(C, P, Q):
- # for points C, P, Q compute the sine between lines CP and QP
- c = JM_point_from_py(C)
- p = JM_point_from_py(P)
- q = JM_point_from_py(Q)
- s = mupdf.fz_normalize_vector(mupdf.fz_make_point(q.x - p.x, q.y - p.y))
- m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -p.x, -p.y)
- m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
- m1 = mupdf.fz_concat(m1, m2)
- c = mupdf.fz_transform_point(c, m1)
- c = mupdf.fz_normalize_vector(c)
- return c.y
- def util_hor_matrix(C, P):
- '''
- Return the matrix that maps two points C, P to the x-axis such that
- C -> (0,0) and the image of P have the same distance.
- '''
- c = JM_point_from_py(C)
- p = JM_point_from_py(P)
-
- # compute (cosine, sine) of vector P-C with double precision:
- s = mupdf.fz_normalize_vector(mupdf.fz_make_point(p.x - c.x, p.y - c.y))
-
- m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -c.x, -c.y)
- m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
- return JM_py_from_matrix(mupdf.fz_concat(m1, m2))
- def match_string(h0, n0):
- h = 0
- n = 0
- e = h
- delta_h, hc = chartocanon(h0[h:])
- h += delta_h
- delta_n, nc = chartocanon(n0[n:])
- n += delta_n
- while hc == nc:
- e = h
- if hc == ord(' '):
- while 1:
- delta_h, hc = chartocanon(h0[h:])
- h += delta_h
- if hc != ord(' '):
- break
- else:
- delta_h, hc = chartocanon(h0[h:])
- h += delta_h
- if nc == ord(' '):
- while 1:
- delta_n, nc = chartocanon(n0[n:])
- n += delta_n
- if nc != ord(' '):
- break
- else:
- delta_n, nc = chartocanon(n0[n:])
- n += delta_n
- return None if nc != 0 else e
- def on_highlight_char(hits, line, ch):
- assert hits
- assert isinstance(line, mupdf.FzStextLine)
- assert isinstance(ch, mupdf.FzStextChar)
- vfuzz = ch.m_internal.size * hits.vfuzz
- hfuzz = ch.m_internal.size * hits.hfuzz
- ch_quad = JM_char_quad(line, ch)
- if hits.len > 0:
- # fixme: end = hits.quads[-1]
- quad = hits.quads[hits.len - 1]
- end = JM_quad_from_py(quad)
- if ( 1
- and hdist(line.m_internal.dir, end.lr, ch_quad.ll) < hfuzz
- and vdist(line.m_internal.dir, end.lr, ch_quad.ll) < vfuzz
- and hdist(line.m_internal.dir, end.ur, ch_quad.ul) < hfuzz
- and vdist(line.m_internal.dir, end.ur, ch_quad.ul) < vfuzz
- ):
- end.ur = ch_quad.ur
- end.lr = ch_quad.lr
- assert hits.quads[-1] == end
- return
- hits.quads.append(ch_quad)
- hits.len += 1
- def page_merge(doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map):
- '''
- Deep-copies a source page to the target.
- Modified version of function of pdfmerge.c: we also copy annotations, but
- we skip some subtypes. In addition we rotate output.
- '''
- if g_use_extra:
- #log( 'Calling C++ extra.page_merge()')
- return extra.page_merge( doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map)
-
- # list of object types (per page) we want to copy
- known_page_objs = [
- PDF_NAME('Contents'),
- PDF_NAME('Resources'),
- PDF_NAME('MediaBox'),
- PDF_NAME('CropBox'),
- PDF_NAME('BleedBox'),
- PDF_NAME('TrimBox'),
- PDF_NAME('ArtBox'),
- PDF_NAME('Rotate'),
- PDF_NAME('UserUnit'),
- ]
- page_ref = mupdf.pdf_lookup_page_obj(doc_src, page_from)
- # make new page dict in dest doc
- page_dict = mupdf.pdf_new_dict(doc_des, 4)
- mupdf.pdf_dict_put(page_dict, PDF_NAME('Type'), PDF_NAME('Page'))
- # copy objects of source page into it
- for i in range( len(known_page_objs)):
- obj = mupdf.pdf_dict_get_inheritable( page_ref, known_page_objs[i])
- if obj.m_internal:
- #log( '{=type(graft_map) type(graft_map.this)}')
- mupdf.pdf_dict_put( page_dict, known_page_objs[i], mupdf.pdf_graft_mapped_object(graft_map.this, obj))
- # Copy annotations, but skip Link, Popup, IRT, Widget types
- # If selected, remove dict keys P (parent) and Popup
- if copy_annots:
- old_annots = mupdf.pdf_dict_get( page_ref, PDF_NAME('Annots'))
- n = mupdf.pdf_array_len( old_annots)
- if n > 0:
- new_annots = mupdf.pdf_dict_put_array( page_dict, PDF_NAME('Annots'), n)
- for i in range(n):
- o = mupdf.pdf_array_get( old_annots, i)
- if not o.m_internal or not mupdf.pdf_is_dict(o):
- continue # skip non-dict items
- if mupdf.pdf_dict_gets( o, "IRT").m_internal:
- continue
- subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
- if mupdf.pdf_name_eq( subtype, PDF_NAME('Link')):
- continue
- if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
- continue
- if mupdf.pdf_name_eq(subtype, PDF_NAME('Widget')):
- continue
- mupdf.pdf_dict_del( o, PDF_NAME('Popup'))
- mupdf.pdf_dict_del( o, PDF_NAME('P'))
- copy_o = mupdf.pdf_graft_mapped_object( graft_map.this, o)
- annot = mupdf.pdf_new_indirect( doc_des, mupdf.pdf_to_num( copy_o), 0)
- mupdf.pdf_array_push( new_annots, annot)
- # rotate the page
- if rotate != -1:
- mupdf.pdf_dict_put_int( page_dict, PDF_NAME('Rotate'), rotate)
- # Now add the page dictionary to dest PDF
- ref = mupdf.pdf_add_object( doc_des, page_dict)
- # Insert new page at specified location
- mupdf.pdf_insert_page( doc_des, page_to, ref)
- def paper_rect(s: str) -> Rect:
- """Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked.
- """
- width, height = paper_size(s)
- return Rect(0.0, 0.0, width, height)
- def paper_size(s: str) -> tuple:
- """Return a tuple (width, height) for a given paper format string.
- Notes:
- 'A4-L' will return (842, 595), the values for A4 landscape.
- Suffix '-P' and no suffix return the portrait tuple.
- """
- size = s.lower()
- f = "p"
- if size.endswith("-l"):
- f = "l"
- size = size[:-2]
- if size.endswith("-p"):
- size = size[:-2]
- rc = paper_sizes().get(size, (-1, -1))
- if f == "p":
- return rc
- return (rc[1], rc[0])
- def paper_sizes():
- """Known paper formats @ 72 dpi as a dictionary. Key is the format string
- like "a4" for ISO-A4. Value is the tuple (width, height).
- Information taken from the following web sites:
- www.din-formate.de
- www.din-formate.info/amerikanische-formate.html
- www.directtools.de/wissen/normen/iso.htm
- """
- return {
- "a0": (2384, 3370),
- "a1": (1684, 2384),
- "a10": (74, 105),
- "a2": (1191, 1684),
- "a3": (842, 1191),
- "a4": (595, 842),
- "a5": (420, 595),
- "a6": (298, 420),
- "a7": (210, 298),
- "a8": (147, 210),
- "a9": (105, 147),
- "b0": (2835, 4008),
- "b1": (2004, 2835),
- "b10": (88, 125),
- "b2": (1417, 2004),
- "b3": (1001, 1417),
- "b4": (709, 1001),
- "b5": (499, 709),
- "b6": (354, 499),
- "b7": (249, 354),
- "b8": (176, 249),
- "b9": (125, 176),
- "c0": (2599, 3677),
- "c1": (1837, 2599),
- "c10": (79, 113),
- "c2": (1298, 1837),
- "c3": (918, 1298),
- "c4": (649, 918),
- "c5": (459, 649),
- "c6": (323, 459),
- "c7": (230, 323),
- "c8": (162, 230),
- "c9": (113, 162),
- "card-4x6": (288, 432),
- "card-5x7": (360, 504),
- "commercial": (297, 684),
- "executive": (522, 756),
- "invoice": (396, 612),
- "ledger": (792, 1224),
- "legal": (612, 1008),
- "legal-13": (612, 936),
- "letter": (612, 792),
- "monarch": (279, 540),
- "tabloid-extra": (864, 1296),
- }
- def pdf_lookup_page_loc(doc, needle):
- return mupdf.pdf_lookup_page_loc(doc, needle)
- def pdfobj_string(o, prefix=''):
- '''
- Returns description of mupdf.PdfObj (wrapper for pdf_obj) <o>.
- '''
- assert 0, 'use mupdf.pdf_debug_obj() ?'
- ret = ''
- if mupdf.pdf_is_array(o):
- l = mupdf.pdf_array_len(o)
- ret += f'array {l}\n'
- for i in range(l):
- oo = mupdf.pdf_array_get(o, i)
- ret += pdfobj_string(oo, prefix + ' ')
- ret += '\n'
- elif mupdf.pdf_is_bool(o):
- ret += f'bool: {o.array_get_bool()}\n'
- elif mupdf.pdf_is_dict(o):
- l = mupdf.pdf_dict_len(o)
- ret += f'dict {l}\n'
- for i in range(l):
- key = mupdf.pdf_dict_get_key(o, i)
- value = mupdf.pdf_dict_get( o, key)
- ret += f'{prefix} {key}: '
- ret += pdfobj_string( value, prefix + ' ')
- ret += '\n'
- elif mupdf.pdf_is_embedded_file(o):
- ret += f'embedded_file: {o.embedded_file_name()}\n'
- elif mupdf.pdf_is_indirect(o):
- ret += f'indirect: ...\n'
- elif mupdf.pdf_is_int(o):
- ret += f'int: {mupdf.pdf_to_int(o)}\n'
- elif mupdf.pdf_is_jpx_image(o):
- ret += f'jpx_image:\n'
- elif mupdf.pdf_is_name(o):
- ret += f'name: {mupdf.pdf_to_name(o)}\n'
- elif o.pdf_is_null:
- ret += f'null\n'
- #elif o.pdf_is_number:
- # ret += f'number\n'
- elif o.pdf_is_real:
- ret += f'real: {o.pdf_to_real()}\n'
- elif mupdf.pdf_is_stream(o):
- ret += f'stream\n'
- elif mupdf.pdf_is_string(o):
- ret += f'string: {mupdf.pdf_to_string(o)}\n'
- else:
- ret += '<>\n'
- return ret
- def repair_mono_font(page: "Page", font: "Font") -> None:
- """Repair character spacing for mono fonts.
- Notes:
- Some mono-spaced fonts are displayed with a too large character
- distance, e.g. "a b c" instead of "abc". This utility adds an entry
- "/W[0 65535 w]" to the descendent font(s) of font. The float w is
- taken to be the width of 0x20 (space).
- This should enforce viewers to use 'w' as the character width.
- Args:
- page: pymupdf.Page object.
- font: pymupdf.Font object.
- """
- if not font.flags["mono"]: # font not flagged as monospaced
- return None
- doc = page.parent # the document
- fontlist = page.get_fonts() # list of fonts on page
- xrefs = [ # list of objects referring to font
- f[0]
- for f in fontlist
- if (f[3] == font.name and f[4].startswith("F") and f[5].startswith("Identity"))
- ]
- if xrefs == []: # our font does not occur
- return
- xrefs = set(xrefs) # drop any double counts
- width = int(round((font.glyph_advance(32) * 1000)))
- for xref in xrefs:
- if not TOOLS.set_font_width(doc, xref, width):
- log("Cannot set width for '%s' in xref %i" % (font.name, xref))
- def sRGB_to_pdf(srgb: int) -> tuple:
- """Convert sRGB color code to a PDF color triple.
- There is **no error checking** for performance reasons!
- Args:
- srgb: (int) RRGGBB (red, green, blue), each color in range(255).
- Returns:
- Tuple (red, green, blue) each item in interval 0 <= item <= 1.
- """
- t = sRGB_to_rgb(srgb)
- return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0
- def sRGB_to_rgb(srgb: int) -> tuple:
- """Convert sRGB color code to an RGB color triple.
- There is **no error checking** for performance reasons!
- Args:
- srgb: (int) SSRRGGBB (red, green, blue), each color in range(255).
- With MuPDF < 1.26, `s` is always 0.
- Returns:
- Tuple (red, green, blue) each item in interval 0 <= item <= 255.
- """
- srgb &= 0xffffff
- r = srgb >> 16
- g = (srgb - (r << 16)) >> 8
- b = srgb - (r << 16) - (g << 8)
- return (r, g, b)
- def string_in_names_list(p, names_list):
- n = mupdf.pdf_array_len( names_list) if names_list else 0
- str_ = mupdf.pdf_to_text_string( p)
- for i in range(0, n, 2):
- if mupdf.pdf_to_text_string( mupdf.pdf_array_get( names_list, i)) == str_:
- return 1
- return 0
- def strip_outline(doc, outlines, page_count, page_object_nums, names_list):
- '''
- Returns (count, first, prev).
- '''
- first = None
- count = 0
- current = outlines
- prev = None
- while current.m_internal:
- # Strip any children to start with. This takes care of
- # First / Last / Count for us.
- nc = strip_outlines(doc, current, page_count, page_object_nums, names_list)
- if not dest_is_valid(current, page_count, page_object_nums, names_list):
- if nc == 0:
- # Outline with invalid dest and no children. Drop it by
- # pulling the next one in here.
- next = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
- if not next.m_internal:
- # There is no next one to pull in
- if prev.m_internal:
- mupdf.pdf_dict_del(prev, PDF_NAME('Next'))
- elif prev.m_internal:
- mupdf.pdf_dict_put(prev, PDF_NAME('Next'), next)
- mupdf.pdf_dict_put(next, PDF_NAME('Prev'), prev)
- else:
- mupdf.pdf_dict_del(next, PDF_NAME('Prev'))
- current = next
- else:
- # Outline with invalid dest, but children. Just drop the dest.
- mupdf.pdf_dict_del(current, PDF_NAME('Dest'))
- mupdf.pdf_dict_del(current, PDF_NAME('A'))
- current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
- else:
- # Keep this one
- if not first or not first.m_internal:
- first = current
- prev = current
- current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
- count += 1
- return count, first, prev
- def strip_outlines(doc, outlines, page_count, page_object_nums, names_list):
- if not outlines.m_internal:
- return 0
- first = mupdf.pdf_dict_get(outlines, PDF_NAME('First'))
- if not first.m_internal:
- nc = 0
- else:
- nc, first, last = strip_outline(doc, first, page_count, page_object_nums, names_list)
- if nc == 0:
- mupdf.pdf_dict_del(outlines, PDF_NAME('First'))
- mupdf.pdf_dict_del(outlines, PDF_NAME('Last'))
- mupdf.pdf_dict_del(outlines, PDF_NAME('Count'))
- else:
- old_count = mupdf.pdf_to_int(mupdf.pdf_dict_get(outlines, PDF_NAME('Count')))
- mupdf.pdf_dict_put(outlines, PDF_NAME('First'), first)
- mupdf.pdf_dict_put(outlines, PDF_NAME('Last'), last)
- mupdf.pdf_dict_put(outlines, PDF_NAME('Count'), mupdf.pdf_new_int(nc if old_count > 0 else -nc))
- return nc
- trace_device_FILL_PATH = 1
- trace_device_STROKE_PATH = 2
- trace_device_CLIP_PATH = 3
- trace_device_CLIP_STROKE_PATH = 4
- def unicode_to_glyph_name(ch: int) -> str:
- """
- Convenience function accessing unicodedata.
- """
- import unicodedata
- try:
- name = unicodedata.name(chr(ch))
- except ValueError:
- name = ".notdef"
- return name
- def vdist(dir, a, b):
- dx = b.x - a.x
- dy = b.y - a.y
- return mupdf.fz_abs(dx * dir.y + dy * dir.x)
- def apply_pages(
- path,
- pagefn,
- *,
- pagefn_args=(),
- pagefn_kwargs=dict(),
- initfn=None,
- initfn_args=(),
- initfn_kwargs=dict(),
- pages=None,
- method='single',
- concurrency=None,
- _stats=False,
- ):
- '''
- Returns list of results from `pagefn()`, optionally using concurrency for
- speed.
-
- Args:
- path:
- Path of document.
- pagefn:
- Function to call for each page; is passed (page, *pagefn_args,
- **pagefn_kwargs). Return value is added to list that we return. If
- `method` is not 'single', must be a top-level function - nested
- functions don't work with concurrency.
- pagefn_args
- pagefn_kwargs:
- Additional args to pass to `pagefn`. Must be picklable.
- initfn:
- If true, called once in each worker process; is passed
- (*initfn_args, **initfn_kwargs).
- initfn_args
- initfn_kwargs:
- Args to pass to initfn. Must be picklable.
- pages:
- List of page numbers to process, or None to include all pages.
- method:
- 'single'
- Do not use concurrency.
- 'mp'
- Operate concurrently using Python's `multiprocessing` module.
- 'fork'
- Operate concurrently using custom implementation with
- `os.fork()`. Does not work on Windows.
- concurrency:
- Number of worker processes to use when operating concurrently. If
- None, we use the number of available CPUs.
- _stats:
- Internal, may change or be removed. If true, we output simple
- timing diagnostics.
-
- Note: We require a file path rather than a Document, because Document
- instances do not work properly after a fork - internal file descriptor
- offsets are shared between the parent and child processes.
- '''
- if _stats:
- t0 = time.time()
-
- if method == 'single':
- if initfn:
- initfn(*initfn_args, **initfn_kwargs)
- ret = list()
- document = Document(path)
- if pages is None:
- pages = range(len(document))
- for pno in pages:
- page = document[pno]
- r = pagefn(page, *pagefn_args, **initfn_kwargs)
- ret.append(r)
-
- else:
- # Use concurrency.
- #
- from . import _apply_pages
-
- if pages is None:
- if _stats:
- t = time.time()
- with Document(path) as document:
- num_pages = len(document)
- pages = list(range(num_pages))
- if _stats:
- t = time.time() - t
- log(f'{t:.2f}s: count pages.')
-
- if _stats:
- t = time.time()
-
- if method == 'mp':
- ret = _apply_pages._multiprocessing(
- path,
- pages,
- pagefn,
- pagefn_args,
- pagefn_kwargs,
- initfn,
- initfn_args,
- initfn_kwargs,
- concurrency,
- _stats,
- )
-
- elif method == 'fork':
- ret = _apply_pages._fork(
- path,
- pages,
- pagefn,
- pagefn_args,
- pagefn_kwargs,
- initfn,
- initfn_args,
- initfn_kwargs,
- concurrency,
- _stats,
- )
-
- else:
- assert 0, f'Unrecognised {method=}.'
-
- if _stats:
- t = time.time() - t
- log(f'{t:.2f}s: work.')
- if _stats:
- t = time.time() - t0
- log(f'{t:.2f}s: total.')
- return ret
- def get_text(
- path,
- *,
- pages=None,
- method='single',
- concurrency=None,
-
- option='text',
- clip=None,
- flags=None,
- textpage=None,
- sort=False,
- delimiters=None,
-
- _stats=False,
- ):
- '''
- Returns list of results from `Page.get_text()`, optionally using
- concurrency for speed.
-
- Args:
- path:
- Path of document.
- pages:
- List of page numbers to process, or None to include all pages.
- method:
- 'single'
- Do not use concurrency.
- 'mp'
- Operate concurrently using Python's `multiprocessing` module.
- 'fork'
- Operate concurrently using custom implementation with
- `os.fork`. Does not work on Windows.
- concurrency:
- Number of worker processes to use when operating concurrently. If
- None, we use the number of available CPUs.
- option
- clip
- flags
- textpage
- sort
- delimiters:
- Passed to internal calls to `Page.get_text()`.
- '''
- args_dict = dict(
- option=option,
- clip=clip,
- flags=flags,
- textpage=textpage,
- sort=sort,
- delimiters=delimiters,
- )
-
- return apply_pages(
- path,
- Page.get_text,
- pagefn_kwargs=args_dict,
- pages=pages,
- method=method,
- concurrency=concurrency,
- _stats=_stats,
- )
- class TOOLS:
- '''
- We use @staticmethod to avoid the need to create an instance of this class.
- '''
- def _derotate_matrix(page):
- if isinstance(page, mupdf.PdfPage):
- return JM_py_from_matrix(JM_derotate_page_matrix(page))
- else:
- return JM_py_from_matrix(mupdf.FzMatrix())
- @staticmethod
- def _fill_widget(annot, widget):
- val = JM_get_widget_properties(annot, widget)
- widget.rect = Rect(annot.rect)
- widget.xref = annot.xref
- widget.parent = annot.parent
- widget._annot = annot # backpointer to annot object
- if not widget.script:
- widget.script = None
- if not widget.script_stroke:
- widget.script_stroke = None
- if not widget.script_format:
- widget.script_format = None
- if not widget.script_change:
- widget.script_change = None
- if not widget.script_calc:
- widget.script_calc = None
- if not widget.script_blur:
- widget.script_blur = None
- if not widget.script_focus:
- widget.script_focus = None
- return val
- @staticmethod
- def _get_all_contents(page):
- page = _as_pdf_page(page.this)
- res = JM_read_contents(page.obj())
- result = JM_BinFromBuffer( res)
- return result
- @staticmethod
- def _insert_contents(page, newcont, overlay=1):
- """Add bytes as a new /Contents object for a page, and return its xref."""
- pdfpage = _as_pdf_page(page, required=1)
- contbuf = JM_BufferFromBytes(newcont)
- xref = JM_insert_contents(pdfpage.doc(), pdfpage.obj(), contbuf, overlay)
- #fixme: pdfpage->doc->dirty = 1;
- return xref
- @staticmethod
- def _le_annot_parms(annot, p1, p2, fill_color):
- """Get common parameters for making annot line end symbols.
- Returns:
- m: matrix that maps p1, p2 to points L, P on the x-axis
- im: its inverse
- L, P: transformed p1, p2
- w: line width
- scol: stroke color string
- fcol: fill color store_shrink
- opacity: opacity string (gs command)
- """
- w = annot.border["width"] # line width
- sc = annot.colors["stroke"] # stroke color
- if not sc: # black if missing
- sc = (0,0,0)
- scol = " ".join(map(str, sc)) + " RG\n"
- if fill_color:
- fc = fill_color
- else:
- fc = annot.colors["fill"] # fill color
- if not fc:
- fc = (1,1,1) # white if missing
- fcol = " ".join(map(str, fc)) + " rg\n"
- # nr = annot.rect
- np1 = p1 # point coord relative to annot rect
- np2 = p2 # point coord relative to annot rect
- m = Matrix(util_hor_matrix(np1, np2)) # matrix makes the line horizontal
- im = ~m # inverted matrix
- L = np1 * m # converted start (left) point
- R = np2 * m # converted end (right) point
- if 0 <= annot.opacity < 1:
- opacity = "/H gs\n"
- else:
- opacity = ""
- return m, im, L, R, w, scol, fcol, opacity
- @staticmethod
- def _le_butt(annot, p1, p2, lr, fill_color):
- """Make stream commands for butt line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 3
- d = shift * max(1, w)
- M = R if lr else L
- top = (M + (0, -d/2.)) * im
- bot = (M + (0, d/2.)) * im
- ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
- ap += "%f %f l\n" % (bot.x, bot.y)
- ap += _format_g(w) + " w\n"
- ap += scol + "s\nQ\n"
- return ap
- @staticmethod
- def _le_circle(annot, p1, p2, lr, fill_color):
- """Make stream commands for circle line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5 # 2*shift*width = length of square edge
- d = shift * max(1, w)
- M = R - (d/2., 0) if lr else L + (d/2., 0)
- r = Rect(M, M) + (-d, -d, d, d) # the square
- ap = "q\n" + opacity + TOOLS._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _le_closedarrow(annot, p1, p2, lr, fill_color):
- """Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5
- d = shift * max(1, w)
- p2 = R + (d/2., 0) if lr else L - (d/2., 0)
- p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
- p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
- p1 *= im
- p2 *= im
- p3 *= im
- ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
- ap += "%f %f l\n" % (p2.x, p2.y)
- ap += "%f %f l\n" % (p3.x, p3.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _le_diamond(annot, p1, p2, lr, fill_color):
- """Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5 # 2*shift*width = length of square edge
- d = shift * max(1, w)
- M = R - (d/2., 0) if lr else L + (d/2., 0)
- r = Rect(M, M) + (-d, -d, d, d) # the square
- # the square makes line longer by (2*shift - 1)*width
- p = (r.tl + (r.bl - r.tl) * 0.5) * im
- ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
- p = (r.tl + (r.tr - r.tl) * 0.5) * im
- ap += "%f %f l\n" % (p.x, p.y)
- p = (r.tr + (r.br - r.tr) * 0.5) * im
- ap += "%f %f l\n" % (p.x, p.y)
- p = (r.br + (r.bl - r.br) * 0.5) * im
- ap += "%f %f l\n" % (p.x, p.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _le_openarrow(annot, p1, p2, lr, fill_color):
- """Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5
- d = shift * max(1, w)
- p2 = R + (d/2., 0) if lr else L - (d/2., 0)
- p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
- p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
- p1 *= im
- p2 *= im
- p3 *= im
- ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
- ap += "%f %f l\n" % (p2.x, p2.y)
- ap += "%f %f l\n" % (p3.x, p3.y)
- ap += _format_g(w) + " w\n"
- ap += scol + "S\nQ\n"
- return ap
- @staticmethod
- def _le_rclosedarrow(annot, p1, p2, lr, fill_color):
- """Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5
- d = shift * max(1, w)
- p2 = R - (2*d, 0) if lr else L + (2*d, 0)
- p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
- p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
- p1 *= im
- p2 *= im
- p3 *= im
- ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
- ap += "%f %f l\n" % (p2.x, p2.y)
- ap += "%f %f l\n" % (p3.x, p3.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _le_ropenarrow(annot, p1, p2, lr, fill_color):
- """Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5
- d = shift * max(1, w)
- p2 = R - (d/3., 0) if lr else L + (d/3., 0)
- p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
- p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
- p1 *= im
- p2 *= im
- p3 *= im
- ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
- ap += "%f %f l\n" % (p2.x, p2.y)
- ap += "%f %f l\n" % (p3.x, p3.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "S\nQ\n"
- return ap
- @staticmethod
- def _le_slash(annot, p1, p2, lr, fill_color):
- """Make stream commands for slash line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- rw = 1.1547 * max(1, w) * 1.0 # makes rect diagonal a 30 deg inclination
- M = R if lr else L
- r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w)
- top = r.tl * im
- bot = r.br * im
- ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
- ap += "%f %f l\n" % (bot.x, bot.y)
- ap += _format_g(w) + " w\n"
- ap += scol + "s\nQ\n"
- return ap
- @staticmethod
- def _le_square(annot, p1, p2, lr, fill_color):
- """Make stream commands for square line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5 # 2*shift*width = length of square edge
- d = shift * max(1, w)
- M = R - (d/2., 0) if lr else L + (d/2., 0)
- r = Rect(M, M) + (-d, -d, d, d) # the square
- # the square makes line longer by (2*shift - 1)*width
- p = r.tl * im
- ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
- p = r.tr * im
- ap += "%f %f l\n" % (p.x, p.y)
- p = r.br * im
- ap += "%f %f l\n" % (p.x, p.y)
- p = r.bl * im
- ap += "%f %f l\n" % (p.x, p.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _oval_string(p1, p2, p3, p4):
- """Return /AP string defining an oval within a 4-polygon provided as points
- """
- def bezier(p, q, r):
- f = "%f %f %f %f %f %f c\n"
- return f % (p.x, p.y, q.x, q.y, r.x, r.y)
- kappa = 0.55228474983 # magic number
- ml = p1 + (p4 - p1) * 0.5 # middle points ...
- mo = p1 + (p2 - p1) * 0.5 # for each ...
- mr = p2 + (p3 - p2) * 0.5 # polygon ...
- mu = p4 + (p3 - p4) * 0.5 # side
- ol1 = ml + (p1 - ml) * kappa # the 8 bezier
- ol2 = mo + (p1 - mo) * kappa # helper points
- or1 = mo + (p2 - mo) * kappa
- or2 = mr + (p2 - mr) * kappa
- ur1 = mr + (p3 - mr) * kappa
- ur2 = mu + (p3 - mu) * kappa
- ul1 = mu + (p4 - mu) * kappa
- ul2 = ml + (p4 - ml) * kappa
- # now draw, starting from middle point of left side
- ap = "%f %f m\n" % (ml.x, ml.y)
- ap += bezier(ol1, ol2, mo)
- ap += bezier(or1, or2, mr)
- ap += bezier(ur1, ur2, mu)
- ap += bezier(ul1, ul2, ml)
- return ap
- @staticmethod
- def _parse_da(annot):
- if g_use_extra:
- val = extra.Tools_parse_da( annot.this)
- else:
- def Tools__parse_da(annot):
- this_annot = annot.this
- assert isinstance(this_annot, mupdf.PdfAnnot)
- this_annot_obj = mupdf.pdf_annot_obj( this_annot)
- pdf = mupdf.pdf_get_bound_document( this_annot_obj)
- try:
- da = mupdf.pdf_dict_get_inheritable( this_annot_obj, PDF_NAME('DA'))
- if not da.m_internal:
- trailer = mupdf.pdf_trailer(pdf)
- da = mupdf.pdf_dict_getl(trailer,
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('DA'),
- )
- da_str = mupdf.pdf_to_text_string(da)
- except Exception:
- if g_exceptions_verbose: exception_info()
- return
- return da_str
- val = Tools__parse_da(annot)
- if not val:
- return ((0,), "", 0)
- font = "Helv"
- fsize = 12
- col = (0, 0, 0)
- dat = val.split() # split on any whitespace
- for i, item in enumerate(dat):
- if item == "Tf":
- font = dat[i - 2][1:]
- fsize = float(dat[i - 1])
- dat[i] = dat[i-1] = dat[i-2] = ""
- continue
- if item == "g": # unicolor text
- col = [(float(dat[i - 1]))]
- dat[i] = dat[i-1] = ""
- continue
- if item == "rg": # RGB colored text
- col = [float(f) for f in dat[i - 3:i]]
- dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
- continue
- if item == "k": # CMYK colored text
- col = [float(f) for f in dat[i - 4:i]]
- dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = ""
- continue
- val = (col, font, fsize)
- return val
- @staticmethod
- def _reset_widget(annot):
- this_annot = annot
- this_annot_obj = mupdf.pdf_annot_obj(this_annot)
- pdf = mupdf.pdf_get_bound_document(this_annot_obj)
- mupdf.pdf_field_reset(pdf, this_annot_obj)
- @staticmethod
- def _rotate_matrix(page):
- pdfpage = page._pdf_page(required=False)
- if not pdfpage.m_internal:
- return JM_py_from_matrix(mupdf.FzMatrix())
- return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage))
- @staticmethod
- def _save_widget(annot, widget):
- JM_set_widget_properties(annot, widget)
- def _update_da(annot, da_str):
- if g_use_extra:
- extra.Tools_update_da( annot.this, da_str)
- else:
- try:
- this_annot = annot.this
- assert isinstance(this_annot, mupdf.PdfAnnot)
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DA'), da_str)
- mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DS')) # /* not supported */
- mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('RC')) # /* not supported */
- except Exception:
- if g_exceptions_verbose: exception_info()
- return
- return
-
- @staticmethod
- def gen_id():
- global TOOLS_JM_UNIQUE_ID
- TOOLS_JM_UNIQUE_ID += 1
- return TOOLS_JM_UNIQUE_ID
- @staticmethod
- def glyph_cache_empty():
- '''
- Empty the glyph cache.
- '''
- mupdf.fz_purge_glyph_cache()
- @staticmethod
- def image_profile(stream, keep_image=0):
- '''
- Metadata of an image binary stream.
- '''
- return JM_image_profile(stream, keep_image)
-
- @staticmethod
- def mupdf_display_errors(on=None):
- '''
- Set MuPDF error display to True or False.
- '''
- global JM_mupdf_show_errors
- if on is not None:
- JM_mupdf_show_errors = bool(on)
- return JM_mupdf_show_errors
- @staticmethod
- def mupdf_display_warnings(on=None):
- '''
- Set MuPDF warnings display to True or False.
- '''
- global JM_mupdf_show_warnings
- if on is not None:
- JM_mupdf_show_warnings = bool(on)
- return JM_mupdf_show_warnings
- @staticmethod
- def mupdf_version():
- '''Get version of MuPDF binary build.'''
- return mupdf.FZ_VERSION
- @staticmethod
- def mupdf_warnings(reset=1):
- '''
- Get the MuPDF warnings/errors with optional reset (default).
- '''
- # Get any trailing `... repeated <N> times...` message.
- mupdf.fz_flush_warnings()
- ret = '\n'.join( JM_mupdf_warnings_store)
- if reset:
- TOOLS.reset_mupdf_warnings()
- return ret
- @staticmethod
- def reset_mupdf_warnings():
- global JM_mupdf_warnings_store
- JM_mupdf_warnings_store = list()
-
- @staticmethod
- def set_aa_level(level):
- '''
- Set anti-aliasing level.
- '''
- mupdf.fz_set_aa_level(level)
-
- @staticmethod
- def set_annot_stem( stem=None):
- global JM_annot_id_stem
- if stem is None:
- return JM_annot_id_stem
- len_ = len(stem) + 1
- if len_ > 50:
- len_ = 50
- JM_annot_id_stem = stem[:50]
- return JM_annot_id_stem
- @staticmethod
- def set_font_width(doc, xref, width):
- pdf = _as_pdf_document(doc, required=0)
- if not pdf.m_internal:
- return False
- font = mupdf.pdf_load_object(pdf, xref)
- dfonts = mupdf.pdf_dict_get(font, PDF_NAME('DescendantFonts'))
- if mupdf.pdf_is_array(dfonts):
- n = mupdf.pdf_array_len(dfonts)
- for i in range(n):
- dfont = mupdf.pdf_array_get(dfonts, i)
- warray = mupdf.pdf_new_array(pdf, 3)
- mupdf.pdf_array_push(warray, mupdf.pdf_new_int(0))
- mupdf.pdf_array_push(warray, mupdf.pdf_new_int(65535))
- mupdf.pdf_array_push(warray, mupdf.pdf_new_int(width))
- mupdf.pdf_dict_put(dfont, PDF_NAME('W'), warray)
- return True
- @staticmethod
- def set_graphics_min_line_width(min_line_width):
- '''
- Set the graphics minimum line width.
- '''
- mupdf.fz_set_graphics_min_line_width(min_line_width)
- @staticmethod
- def set_icc( on=0):
- """Set ICC color handling on or off."""
- if on:
- if mupdf.FZ_ENABLE_ICC:
- mupdf.fz_enable_icc()
- else:
- RAISEPY( "MuPDF built w/o ICC support",PyExc_ValueError)
- elif mupdf.FZ_ENABLE_ICC:
- mupdf.fz_disable_icc()
-
- @staticmethod
- def set_low_memory( on=None):
- """Set / unset MuPDF device caching."""
- if on is not None:
- _globals.no_device_caching = bool(on)
- return _globals.no_device_caching
- @staticmethod
- def set_small_glyph_heights(on=None):
- """Set / unset small glyph heights."""
- if on is not None:
- _globals.small_glyph_heights = bool(on)
- if g_use_extra:
- extra.set_small_glyph_heights(_globals.small_glyph_heights)
- return _globals.small_glyph_heights
-
- @staticmethod
- def set_subset_fontnames(on=None):
- '''
- Set / unset returning fontnames with their subset prefix.
- '''
- if on is not None:
- _globals.subset_fontnames = bool(on)
- if g_use_extra:
- extra.set_subset_fontnames(_globals.subset_fontnames)
- return _globals.subset_fontnames
-
- @staticmethod
- def show_aa_level():
- '''
- Show anti-aliasing values.
- '''
- return dict(
- graphics = mupdf.fz_graphics_aa_level(),
- text = mupdf.fz_text_aa_level(),
- graphics_min_line_width = mupdf.fz_graphics_min_line_width(),
- )
- @staticmethod
- def store_maxsize():
- '''
- MuPDF store size limit.
- '''
- # fixme: return gctx->store->max.
- return None
- @staticmethod
- def store_shrink(percent):
- '''
- Free 'percent' of current store size.
- '''
- if percent >= 100:
- mupdf.fz_empty_store()
- return 0
- if percent > 0:
- mupdf.fz_shrink_store( 100 - percent)
- # fixme: return gctx->store->size.
-
- @staticmethod
- def store_size():
- '''
- MuPDF current store size.
- '''
- # fixme: return gctx->store->size.
- return None
-
- @staticmethod
- def unset_quad_corrections(on=None):
- '''
- Set ascender / descender corrections on or off.
- '''
- if on is not None:
- _globals.skip_quad_corrections = bool(on)
- if g_use_extra:
- extra.set_skip_quad_corrections(_globals.skip_quad_corrections)
- return _globals.skip_quad_corrections
- # fixme: also defined at top-level.
- JM_annot_id_stem = 'fitz'
- fitz_config = JM_fitz_config()
- # Callbacks not yet supported with cppyy.
- if not mupdf_cppyy:
- mupdf.fz_set_warning_callback(JM_mupdf_warning)
- mupdf.fz_set_error_callback(JM_mupdf_error)
- # If there are pending warnings when we exit, we end up in this sequence:
- #
- # atexit()
- # -> mupdf::internal_thread_state::~internal_thread_state()
- # -> fz_drop_context()
- # -> fz_flush_warnings()
- # -> SWIG Director code
- # -> Python calling JM_mupdf_warning().
- #
- # Unfortunately this causes a SEGV, seemingly because the SWIG Director code has
- # already been torn down.
- #
- # So we use a Python atexit handler to explicitly call fz_flush_warnings();
- # this appears to happen early enough for the Director machinery to still
- # work. So in the sequence above, fz_flush_warnings() will find that there are
- # no pending warnings and will not attempt to call JM_mupdf_warning().
- #
- def _atexit():
- #log( 'PyMuPDF/src/__init__.py:_atexit() called')
- mupdf.fz_flush_warnings()
- mupdf.fz_set_warning_callback(None)
- mupdf.fz_set_error_callback(None)
- #log( '_atexit() returning')
- atexit.register( _atexit)
- # List of (name, red, green, blue) where:
- # name: upper-case name.
- # red, green, blue: integer in range 0..255.
- #
- from . import _wxcolors
- _wxcolors = _wxcolors._wxcolors
- # Dict mapping from name to (red, green, blue).
- # name: lower-case name.
- # red, green, blue: float in range 0..1.
- #
- pdfcolor = dict()
- for name, r, g, b in _wxcolors:
- pdfcolor[name.lower()] = (r/255, g/255, b/255)
- def colors_pdf_dict():
- '''
- Returns dict mapping from name to (red, green, blue).
- name: lower-case name.
- red, green, blue: float in range 0..1.
- '''
- return pdfcolor
- def colors_wx_list():
- '''
- Returns list of (name, red, green, blue) tuples:
- name: upper-case name.
- red, green, blue: integers in range 0..255.
- '''
- return _wxcolors
- def _mupdf_devel(make_links=True):
- '''
- Allows PyMuPDF installation to be used to compile and link programmes that
- use the MuPDF C/C++ API.
-
- Args:
- make_links:
- If true, then on non-windows we also create softlinks to any shared
- libraries that are supplied with a version suffix; this allows them
- to be used in a link command.
- For example we create links such as:
- site-packages/pymupdf/
- libmupdf.so -> libmupdf.so.26.7
- libmupdfcpp.so -> libmupdfcpp.so.26.7
-
- Returns: (mupdf_include, mupdf_lib).
- mupdf_include:
- Path of MuPDF include directory within PyMuPDF install.
- mupdf_lib
- Path of MuPDF library directory within PyMuPDF install.
- '''
- import platform
-
- log(f'{mupdf_version=}')
-
- p = os.path.normpath(f'{__file__}/..')
- mupdf_include = f'{p}/mupdf-devel/include'
-
- if platform.system() == 'Windows':
- # Separate .lib files are used at build time.
- mupdf_lib = f'{p}/mupdf-devel/lib'
- else:
- # .so files are used for both buildtime and runtime linking.
- mupdf_lib = p
- log(f'Within installed PyMuPDF:')
- log(f' {mupdf_include=}')
- log(f' {mupdf_lib=}')
- assert os.path.isdir(mupdf_include), f'Not a directory: {mupdf_include=}.'
- assert os.path.isdir(mupdf_lib), f'Not a directory: {mupdf_lib=}.'
- if platform.system() != 'Windows' and make_links:
- # Make symbolic links within the installed pymupdf module so
- # that ld can find libmupdf.so etc. This is a bit of a hack, but
- # necessary because wheels cannot contain symbolic links.
- #
- # For example we create `libmupdf.so -> libmupdf.so.24.8`.
- #
- # We are careful to only create symlinks for the expected MuPDF
- # version, in case old .so files from a previous install are still
- # in place.
- #
- log(f'Creating symlinks in {mupdf_lib=} for MuPDF-{mupdf_version} .so files.')
- regex_suffix = mupdf_version.split('.')[1:3]
- regex_suffix = '[.]'.join(regex_suffix)
- mupdf_lib_regex = f'^(lib[^.]+[.]so)[.]{regex_suffix}$'
- log(f'{mupdf_lib_regex=}.')
- for leaf in os.listdir(mupdf_lib):
- m = re.match(mupdf_lib_regex, leaf)
- if m:
- pfrom = f'{mupdf_lib}/{m.group(1)}'
- # os.path.exists() can return false if softlink exists
- # but points to non-existent file, so we also use
- # `os.path.islink()`.
- if os.path.islink(pfrom) or os.path.exists(pfrom):
- log(f'Removing existing link {pfrom=}.')
- os.remove(pfrom)
- log(f'Creating symlink: {pfrom} -> {leaf}')
- os.symlink(leaf, pfrom)
-
- return mupdf_include, mupdf_lib
- # We cannot import utils earlier because it imports this .py file itself and
- # uses some pymupdf.* types in function typing.
- #
- from . import utils
- # Use utils.*() fns for some class methods.
- #
- recover_bbox_quad = utils.recover_bbox_quad
- recover_char_quad = utils.recover_char_quad
- recover_line_quad = utils.recover_line_quad
- recover_quad = utils.recover_quad
- recover_span_quad = utils.recover_span_quad
- from .table import find_tables
- Page.find_tables = find_tables
- class FitzDeprecation(DeprecationWarning):
- pass
- def restore_aliases():
- warnings.filterwarnings( "once", category=FitzDeprecation)
- def showthis(msg, cat, filename, lineno, file=None, line=None):
- text = warnings.formatwarning(msg, cat, filename, lineno, line=line)
- s = text.find("FitzDeprecation")
- if s < 0:
- log(text)
- return
- text = text[s:].splitlines()[0][4:]
- log(text)
- warnings.showwarning = showthis
- def _alias(class_, new_name, legacy_name=None):
- '''
- Adds an alias for a class_ or module item clled <class_>.<new>.
- class_:
- Class/module to modify; use None for the current module.
- new_name:
- String name of existing item, e.g. name of method.
- legacy_name:
- Name of legacy object to create in <class_>. If None, we generate
- from <item> by removing underscores and capitalising the next
- letter.
- '''
- if class_ is None:
- class_ = sys.modules[__name__]
- if not legacy_name:
- legacy_name = ''
- capitalise_next = False
- for c in new_name:
- if c == '_':
- capitalise_next = True
- elif capitalise_next:
- legacy_name += c.upper()
- capitalise_next = False
- else:
- legacy_name += c
- new_object = getattr( class_, new_name)
- assert not getattr( class_, legacy_name, None), f'class {class_} already has {legacy_name}'
- if callable( new_object):
- def deprecated_function( *args, **kwargs):
- warnings.warn(
- f'"{legacy_name=}" removed from {class_} after v1.19.0 - use "{new_name}".',
- category=FitzDeprecation,
- )
- return new_object( *args, **kwargs)
- setattr( class_, legacy_name, deprecated_function)
- deprecated_function.__doc__ = (
- f'*** Deprecated and removed in version after v1.19.0 - use "{new_name}". ***\n'
- f'{new_object.__doc__}'
- )
- else:
- setattr( class_, legacy_name, new_object)
- _alias( Annot, 'get_file', 'fileGet')
- _alias( Annot, 'get_pixmap')
- _alias( Annot, 'get_sound', 'soundGet')
- _alias( Annot, 'get_text')
- _alias( Annot, 'get_textbox')
- _alias( Annot, 'get_textpage', 'getTextPage')
- _alias( Annot, 'line_ends')
- _alias( Annot, 'set_blendmode', 'setBlendMode')
- _alias( Annot, 'set_border')
- _alias( Annot, 'set_colors')
- _alias( Annot, 'set_flags')
- _alias( Annot, 'set_info')
- _alias( Annot, 'set_line_ends')
- _alias( Annot, 'set_name')
- _alias( Annot, 'set_oc', 'setOC')
- _alias( Annot, 'set_opacity')
- _alias( Annot, 'set_rect')
- _alias( Annot, 'update_file', 'fileUpd')
- _alias( DisplayList, 'get_pixmap')
- _alias( DisplayList, 'get_textpage', 'getTextPage')
- _alias( Document, 'chapter_count')
- _alias( Document, 'chapter_page_count')
- _alias( Document, 'convert_to_pdf', 'convertToPDF')
- _alias( Document, 'copy_page')
- _alias( Document, 'delete_page')
- _alias( Document, 'delete_pages', 'deletePageRange')
- _alias( Document, 'embfile_add', 'embeddedFileAdd')
- _alias( Document, 'embfile_count', 'embeddedFileCount')
- _alias( Document, 'embfile_del', 'embeddedFileDel')
- _alias( Document, 'embfile_get', 'embeddedFileGet')
- _alias( Document, 'embfile_info', 'embeddedFileInfo')
- _alias( Document, 'embfile_names', 'embeddedFileNames')
- _alias( Document, 'embfile_upd', 'embeddedFileUpd')
- _alias( Document, 'extract_font')
- _alias( Document, 'extract_image')
- _alias( Document, 'find_bookmark')
- _alias( Document, 'fullcopy_page')
- _alias( Document, 'get_char_widths')
- _alias( Document, 'get_ocgs', 'getOCGs')
- _alias( Document, 'get_page_fonts', 'getPageFontList')
- _alias( Document, 'get_page_images', 'getPageImageList')
- _alias( Document, 'get_page_pixmap')
- _alias( Document, 'get_page_text')
- _alias( Document, 'get_page_xobjects', 'getPageXObjectList')
- _alias( Document, 'get_sigflags', 'getSigFlags')
- _alias( Document, 'get_toc', 'getToC')
- _alias( Document, 'get_xml_metadata')
- _alias( Document, 'insert_page')
- _alias( Document, 'insert_pdf', 'insertPDF')
- _alias( Document, 'is_dirty')
- _alias( Document, 'is_form_pdf', 'isFormPDF')
- _alias( Document, 'is_pdf', 'isPDF')
- _alias( Document, 'is_reflowable')
- _alias( Document, 'is_repaired')
- _alias( Document, 'last_location')
- _alias( Document, 'load_page')
- _alias( Document, 'make_bookmark')
- _alias( Document, 'move_page')
- _alias( Document, 'needs_pass')
- _alias( Document, 'new_page')
- _alias( Document, 'next_location')
- _alias( Document, 'page_count')
- _alias( Document, 'page_cropbox', 'pageCropBox')
- _alias( Document, 'page_xref')
- _alias( Document, 'pdf_catalog', 'PDFCatalog')
- _alias( Document, 'pdf_trailer', 'PDFTrailer')
- _alias( Document, 'prev_location', 'previousLocation')
- _alias( Document, 'resolve_link')
- _alias( Document, 'search_page_for')
- _alias( Document, 'set_language')
- _alias( Document, 'set_metadata')
- _alias( Document, 'set_toc', 'setToC')
- _alias( Document, 'set_xml_metadata')
- _alias( Document, 'update_object')
- _alias( Document, 'update_stream')
- _alias( Document, 'xref_is_stream', 'isStream')
- _alias( Document, 'xref_length')
- _alias( Document, 'xref_object')
- _alias( Document, 'xref_stream')
- _alias( Document, 'xref_stream_raw')
- _alias( Document, 'xref_xml_metadata', 'metadataXML')
- _alias( IRect, 'get_area')
- _alias( IRect, 'get_area', 'getRectArea')
- _alias( IRect, 'include_point')
- _alias( IRect, 'include_rect')
- _alias( IRect, 'is_empty')
- _alias( IRect, 'is_infinite')
- _alias( Link, 'is_external')
- _alias( Link, 'set_border')
- _alias( Link, 'set_colors')
- _alias( Matrix, 'is_rectilinear')
- _alias( Matrix, 'prerotate', 'preRotate')
- _alias( Matrix, 'prescale', 'preScale')
- _alias( Matrix, 'preshear', 'preShear')
- _alias( Matrix, 'pretranslate', 'preTranslate')
- _alias( None, 'get_pdf_now', 'getPDFnow')
- _alias( None, 'get_pdf_str', 'getPDFstr')
- _alias( None, 'get_text_length')
- _alias( None, 'get_text_length', 'getTextlength')
- _alias( None, 'image_profile', 'ImageProperties')
- _alias( None, 'paper_rect', 'PaperRect')
- _alias( None, 'paper_size', 'PaperSize')
- _alias( None, 'paper_sizes')
- _alias( None, 'planish_line')
- _alias( Outline, 'is_external')
- _alias( Outline, 'is_open')
- _alias( Page, 'add_caret_annot')
- _alias( Page, 'add_circle_annot')
- _alias( Page, 'add_file_annot')
- _alias( Page, 'add_freetext_annot')
- _alias( Page, 'add_highlight_annot')
- _alias( Page, 'add_ink_annot')
- _alias( Page, 'add_line_annot')
- _alias( Page, 'add_polygon_annot')
- _alias( Page, 'add_polyline_annot')
- _alias( Page, 'add_rect_annot')
- _alias( Page, 'add_redact_annot')
- _alias( Page, 'add_squiggly_annot')
- _alias( Page, 'add_stamp_annot')
- _alias( Page, 'add_strikeout_annot')
- _alias( Page, 'add_text_annot')
- _alias( Page, 'add_underline_annot')
- _alias( Page, 'add_widget')
- _alias( Page, 'clean_contents')
- _alias( Page, 'cropbox', 'CropBox')
- _alias( Page, 'cropbox_position', 'CropBoxPosition')
- _alias( Page, 'delete_annot')
- _alias( Page, 'delete_link')
- _alias( Page, 'delete_widget')
- _alias( Page, 'derotation_matrix')
- _alias( Page, 'draw_bezier')
- _alias( Page, 'draw_circle')
- _alias( Page, 'draw_curve')
- _alias( Page, 'draw_line')
- _alias( Page, 'draw_oval')
- _alias( Page, 'draw_polyline')
- _alias( Page, 'draw_quad')
- _alias( Page, 'draw_rect')
- _alias( Page, 'draw_sector')
- _alias( Page, 'draw_squiggle')
- _alias( Page, 'draw_zigzag')
- _alias( Page, 'first_annot')
- _alias( Page, 'first_link')
- _alias( Page, 'first_widget')
- _alias( Page, 'get_contents')
- _alias( Page, 'get_displaylist', 'getDisplayList')
- _alias( Page, 'get_drawings')
- _alias( Page, 'get_fonts', 'getFontList')
- _alias( Page, 'get_image_bbox')
- _alias( Page, 'get_images', 'getImageList')
- _alias( Page, 'get_links')
- _alias( Page, 'get_pixmap')
- _alias( Page, 'get_svg_image', 'getSVGimage')
- _alias( Page, 'get_text')
- _alias( Page, 'get_text_blocks')
- _alias( Page, 'get_text_words')
- _alias( Page, 'get_textbox')
- _alias( Page, 'get_textpage', 'getTextPage')
- _alias( Page, 'insert_font')
- _alias( Page, 'insert_image')
- _alias( Page, 'insert_link')
- _alias( Page, 'insert_text')
- _alias( Page, 'insert_textbox')
- _alias( Page, 'is_wrapped', '_isWrapped')
- _alias( Page, 'load_annot')
- _alias( Page, 'load_links')
- _alias( Page, 'mediabox', 'MediaBox')
- _alias( Page, 'mediabox_size', 'MediaBoxSize')
- _alias( Page, 'new_shape')
- _alias( Page, 'read_contents')
- _alias( Page, 'rotation_matrix')
- _alias( Page, 'search_for')
- _alias( Page, 'set_cropbox', 'setCropBox')
- _alias( Page, 'set_mediabox', 'setMediaBox')
- _alias( Page, 'set_rotation')
- _alias( Page, 'show_pdf_page', 'showPDFpage')
- _alias( Page, 'transformation_matrix')
- _alias( Page, 'update_link')
- _alias( Page, 'wrap_contents')
- _alias( Page, 'write_text')
- _alias( Pixmap, 'clear_with')
- _alias( Pixmap, 'copy', 'copyPixmap')
- _alias( Pixmap, 'gamma_with')
- _alias( Pixmap, 'invert_irect', 'invertIRect')
- _alias( Pixmap, 'pil_save', 'pillowWrite')
- _alias( Pixmap, 'pil_tobytes', 'pillowData')
- _alias( Pixmap, 'save', 'writeImage')
- _alias( Pixmap, 'save', 'writePNG')
- _alias( Pixmap, 'set_alpha')
- _alias( Pixmap, 'set_dpi', 'setResolution')
- _alias( Pixmap, 'set_origin')
- _alias( Pixmap, 'set_pixel')
- _alias( Pixmap, 'set_rect')
- _alias( Pixmap, 'tint_with')
- _alias( Pixmap, 'tobytes', 'getImageData')
- _alias( Pixmap, 'tobytes', 'getPNGData')
- _alias( Pixmap, 'tobytes', 'getPNGdata')
- _alias( Quad, 'is_convex')
- _alias( Quad, 'is_empty')
- _alias( Quad, 'is_rectangular')
- _alias( Rect, 'get_area')
- _alias( Rect, 'get_area', 'getRectArea')
- _alias( Rect, 'include_point')
- _alias( Rect, 'include_rect')
- _alias( Rect, 'is_empty')
- _alias( Rect, 'is_infinite')
- _alias( TextWriter, 'fill_textbox')
- _alias( TextWriter, 'write_text')
- _alias( Shape, 'draw_bezier')
- _alias( Shape, 'draw_circle')
- _alias( Shape, 'draw_curve')
- _alias( Shape, 'draw_line')
- _alias( Shape, 'draw_oval')
- _alias( Shape, 'draw_polyline')
- _alias( Shape, 'draw_quad')
- _alias( Shape, 'draw_rect')
- _alias( Shape, 'draw_sector')
- _alias( Shape, 'draw_squiggle')
- _alias( Shape, 'draw_zigzag')
- _alias( Shape, 'insert_text')
- _alias( Shape, 'insert_textbox')
- if 0:
- restore_aliases()
- __version__ = VersionBind
- __doc__ = (
- f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n'
- f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n'
- )
|