__init__.py 728 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343193441934519346193471934819349193501935119352193531935419355193561935719358193591936019361193621936319364193651936619367193681936919370193711937219373193741937519376193771937819379193801938119382193831938419385193861938719388193891939019391193921939319394193951939619397193981939919400194011940219403194041940519406194071940819409194101941119412194131941419415194161941719418194191942019421194221942319424194251942619427194281942919430194311943219433194341943519436194371943819439194401944119442194431944419445194461944719448194491945019451194521945319454194551945619457194581945919460194611946219463194641946519466194671946819469194701947119472194731947419475194761947719478194791948019481194821948319484194851948619487194881948919490194911949219493194941949519496194971949819499195001950119502195031950419505195061950719508195091951019511195121951319514195151951619517195181951919520195211952219523195241952519526195271952819529195301953119532195331953419535195361953719538195391954019541195421954319544195451954619547195481954919550195511955219553195541955519556195571955819559195601956119562195631956419565195661956719568195691957019571195721957319574195751957619577195781957919580195811958219583195841958519586195871958819589195901959119592195931959419595195961959719598195991960019601196021960319604196051960619607196081960919610196111961219613196141961519616196171961819619196201962119622196231962419625196261962719628196291963019631196321963319634196351963619637196381963919640196411964219643196441964519646196471964819649196501965119652196531965419655196561965719658196591966019661196621966319664196651966619667196681966919670196711967219673196741967519676196771967819679196801968119682196831968419685196861968719688196891969019691196921969319694196951969619697196981969919700197011970219703197041970519706197071970819709197101971119712197131971419715197161971719718197191972019721197221972319724197251972619727197281972919730197311973219733197341973519736197371973819739197401974119742197431974419745197461974719748197491975019751197521975319754197551975619757197581975919760197611976219763197641976519766197671976819769197701977119772197731977419775197761977719778197791978019781197821978319784197851978619787197881978919790197911979219793197941979519796197971979819799198001980119802198031980419805198061980719808198091981019811198121981319814198151981619817198181981919820198211982219823198241982519826198271982819829198301983119832198331983419835198361983719838198391984019841198421984319844198451984619847198481984919850198511985219853198541985519856198571985819859198601986119862198631986419865198661986719868198691987019871198721987319874198751987619877198781987919880198811988219883198841988519886198871988819889198901989119892198931989419895198961989719898198991990019901199021990319904199051990619907199081990919910199111991219913199141991519916199171991819919199201992119922199231992419925199261992719928199291993019931199321993319934199351993619937199381993919940199411994219943199441994519946199471994819949199501995119952199531995419955199561995719958199591996019961199621996319964199651996619967199681996919970199711997219973199741997519976199771997819979199801998119982199831998419985199861998719988199891999019991199921999319994199951999619997199981999920000200012000220003200042000520006200072000820009200102001120012200132001420015200162001720018200192002020021200222002320024200252002620027200282002920030200312003220033200342003520036200372003820039200402004120042200432004420045200462004720048200492005020051200522005320054200552005620057200582005920060200612006220063200642006520066200672006820069200702007120072200732007420075200762007720078200792008020081200822008320084200852008620087200882008920090200912009220093200942009520096200972009820099201002010120102201032010420105201062010720108201092011020111201122011320114201152011620117201182011920120201212012220123201242012520126201272012820129201302013120132201332013420135201362013720138201392014020141201422014320144201452014620147201482014920150201512015220153201542015520156201572015820159201602016120162201632016420165201662016720168201692017020171201722017320174201752017620177201782017920180201812018220183201842018520186201872018820189201902019120192201932019420195201962019720198201992020020201202022020320204202052020620207202082020920210202112021220213202142021520216202172021820219202202022120222202232022420225202262022720228202292023020231202322023320234202352023620237202382023920240202412024220243202442024520246202472024820249202502025120252202532025420255202562025720258202592026020261202622026320264202652026620267202682026920270202712027220273202742027520276202772027820279202802028120282202832028420285202862028720288202892029020291202922029320294202952029620297202982029920300203012030220303203042030520306203072030820309203102031120312203132031420315203162031720318203192032020321203222032320324203252032620327203282032920330203312033220333203342033520336203372033820339203402034120342203432034420345203462034720348203492035020351203522035320354203552035620357203582035920360203612036220363203642036520366203672036820369203702037120372203732037420375203762037720378203792038020381203822038320384203852038620387203882038920390203912039220393203942039520396203972039820399204002040120402204032040420405204062040720408204092041020411204122041320414204152041620417204182041920420204212042220423204242042520426204272042820429204302043120432204332043420435204362043720438204392044020441204422044320444204452044620447204482044920450204512045220453204542045520456204572045820459204602046120462204632046420465204662046720468204692047020471204722047320474204752047620477204782047920480204812048220483204842048520486204872048820489204902049120492204932049420495204962049720498204992050020501205022050320504205052050620507205082050920510205112051220513205142051520516205172051820519205202052120522205232052420525205262052720528205292053020531205322053320534205352053620537205382053920540205412054220543205442054520546205472054820549205502055120552205532055420555205562055720558205592056020561205622056320564205652056620567205682056920570205712057220573205742057520576205772057820579205802058120582205832058420585205862058720588205892059020591205922059320594205952059620597205982059920600206012060220603206042060520606206072060820609206102061120612206132061420615206162061720618206192062020621206222062320624206252062620627206282062920630206312063220633206342063520636206372063820639206402064120642206432064420645206462064720648206492065020651206522065320654206552065620657206582065920660206612066220663206642066520666206672066820669206702067120672206732067420675206762067720678206792068020681206822068320684206852068620687206882068920690206912069220693206942069520696206972069820699207002070120702207032070420705207062070720708207092071020711207122071320714207152071620717207182071920720207212072220723207242072520726207272072820729207302073120732207332073420735207362073720738207392074020741207422074320744207452074620747207482074920750207512075220753207542075520756207572075820759207602076120762207632076420765207662076720768207692077020771207722077320774207752077620777207782077920780207812078220783207842078520786207872078820789207902079120792207932079420795207962079720798207992080020801208022080320804208052080620807208082080920810208112081220813208142081520816208172081820819208202082120822208232082420825208262082720828208292083020831208322083320834208352083620837208382083920840208412084220843208442084520846208472084820849208502085120852208532085420855208562085720858208592086020861208622086320864208652086620867208682086920870208712087220873208742087520876208772087820879208802088120882208832088420885208862088720888208892089020891208922089320894208952089620897208982089920900209012090220903209042090520906209072090820909209102091120912209132091420915209162091720918209192092020921209222092320924209252092620927209282092920930209312093220933209342093520936209372093820939209402094120942209432094420945209462094720948209492095020951209522095320954209552095620957209582095920960209612096220963209642096520966209672096820969209702097120972209732097420975209762097720978209792098020981209822098320984209852098620987209882098920990209912099220993209942099520996209972099820999210002100121002210032100421005210062100721008210092101021011210122101321014210152101621017210182101921020210212102221023210242102521026210272102821029210302103121032210332103421035210362103721038210392104021041210422104321044210452104621047210482104921050210512105221053210542105521056210572105821059210602106121062210632106421065210662106721068210692107021071210722107321074210752107621077210782107921080210812108221083210842108521086210872108821089210902109121092210932109421095210962109721098210992110021101211022110321104211052110621107211082110921110211112111221113211142111521116211172111821119211202112121122211232112421125211262112721128211292113021131211322113321134211352113621137211382113921140211412114221143211442114521146211472114821149211502115121152211532115421155211562115721158211592116021161211622116321164211652116621167211682116921170211712117221173211742117521176211772117821179211802118121182211832118421185211862118721188211892119021191211922119321194211952119621197211982119921200212012120221203212042120521206212072120821209212102121121212212132121421215212162121721218212192122021221212222122321224212252122621227212282122921230212312123221233212342123521236212372123821239212402124121242212432124421245212462124721248212492125021251212522125321254212552125621257212582125921260212612126221263212642126521266212672126821269212702127121272212732127421275212762127721278212792128021281212822128321284212852128621287212882128921290212912129221293212942129521296212972129821299213002130121302213032130421305213062130721308
  1. '''
  2. PyMuPDF implemented on top of MuPDF Python bindings.
  3. License:
  4. SPDX-License-Identifier: GPL-3.0-only
  5. '''
  6. # To reduce startup times, we don't import everything we require here.
  7. #
  8. import atexit
  9. import binascii
  10. import collections
  11. import inspect
  12. import io
  13. import math
  14. import os
  15. import pathlib
  16. import glob
  17. import re
  18. import string
  19. import sys
  20. import tarfile
  21. import time
  22. import typing
  23. import warnings
  24. import weakref
  25. import zipfile
  26. from . import extra
  27. # Set up g_out_log and g_out_message from environment variables.
  28. #
  29. # PYMUPDF_MESSAGE controls the destination of user messages (from function
  30. # `pymupdf.message()`).
  31. #
  32. # PYMUPDF_LOG controls the destination of internal development logging (from
  33. # function `pymupdf.log()`).
  34. #
  35. # For syntax, see _make_output()'s `text` arg.
  36. #
  37. def _make_output(
  38. *,
  39. text=None,
  40. fd=None,
  41. stream=None,
  42. path=None,
  43. path_append=None,
  44. pylogging=None,
  45. pylogging_logger=None,
  46. pylogging_level=None,
  47. pylogging_name=None,
  48. default=None,
  49. ):
  50. '''
  51. Returns a stream that writes to a specified destination, which can be a
  52. file descriptor, a file, an existing stream or Python's `logging' system.
  53. Args:
  54. text: text specification of destination.
  55. fd:<int> - write to file descriptor.
  56. path:<str> - write to file.
  57. path+:<str> - append to file.
  58. logging:<items> - write to Python `logging` module.
  59. items: comma-separated <name=value> pairs.
  60. level=<int>
  61. name=<str>.
  62. Other names are ignored.
  63. fd: an int file descriptor.
  64. stream: something with methods .write(text) and .flush().
  65. If specified we simply return <stream>.
  66. path: a file path.
  67. If specified we return a stream that writes to this file.
  68. path_append: a file path.
  69. If specified we return a stream that appends to this file.
  70. pylogging*:
  71. if any of these args is not None, we return a stream that writes to
  72. Python's `logging` module.
  73. pylogging:
  74. Unused other than to activate use of logging module.
  75. pylogging_logger:
  76. A logging.Logger; If None, set from <pylogging_name>.
  77. pylogging_level:
  78. An int log level, if None we use
  79. pylogging_logger.getEffectiveLevel().
  80. pylogging_name:
  81. Only used if <pylogging_logger> is None:
  82. If <pylogging_name> is None, we set it to 'pymupdf'.
  83. Then we do: pylogging_logger = logging.getLogger(pylogging_name)
  84. '''
  85. if text is not None:
  86. # Textual specification, for example from from environment variable.
  87. if text.startswith('fd:'):
  88. fd = int(text[3:])
  89. elif text.startswith('path:'):
  90. path = text[5:]
  91. elif text.startswith('path+'):
  92. path_append = text[5:]
  93. elif text.startswith('logging:'):
  94. pylogging = True
  95. items_d = dict()
  96. items = text[8:].split(',')
  97. #items_d = {n: v for (n, v) in [item.split('=', 1) for item in items]}
  98. for item in items:
  99. if not item:
  100. continue
  101. nv = item.split('=', 1)
  102. assert len(nv) == 2, f'Need `=` in {item=}.'
  103. n, v = nv
  104. items_d[n] = v
  105. pylogging_level = items_d.get('level')
  106. if pylogging_level is not None:
  107. pylogging_level = int(pylogging_level)
  108. pylogging_name = items_d.get('name', 'pymupdf')
  109. else:
  110. assert 0, f'Expected prefix `fd:`, `path:`. `path+:` or `logging:` in {text=}.'
  111. if fd is not None:
  112. ret = open(fd, mode='w', closefd=False)
  113. elif stream is not None:
  114. assert hasattr(stream, 'write')
  115. assert hasattr(stream, 'flush')
  116. ret = stream
  117. elif path is not None:
  118. ret = open(path, 'w')
  119. elif path_append is not None:
  120. ret = open(path_append, 'a')
  121. elif (0
  122. or pylogging is not None
  123. or pylogging_logger is not None
  124. or pylogging_level is not None
  125. or pylogging_name is not None
  126. ):
  127. import logging
  128. if pylogging_logger is None:
  129. if pylogging_name is None:
  130. pylogging_name = 'pymupdf'
  131. pylogging_logger = logging.getLogger(pylogging_name)
  132. assert isinstance(pylogging_logger, logging.Logger)
  133. if pylogging_level is None:
  134. pylogging_level = pylogging_logger.getEffectiveLevel()
  135. class Out:
  136. def write(self, text):
  137. # `logging` module appends newlines, but so does the `print()`
  138. # functions in our caller message() and log() fns, so we need to
  139. # remove them here.
  140. text = text.rstrip('\n')
  141. if text:
  142. pylogging_logger.log(pylogging_level, text)
  143. def flush(self):
  144. pass
  145. ret = Out()
  146. else:
  147. ret = default
  148. return ret
  149. # Set steam used by PyMuPDF messaging.
  150. _g_out_message = _make_output(text=os.environ.get('PYMUPDF_MESSAGE'), default=sys.stdout)
  151. # Set steam used by PyMuPDF development/debugging logging.
  152. _g_out_log = _make_output(text=os.environ.get('PYMUPDF_LOG'), default=sys.stdout)
  153. # Things for testing logging.
  154. _g_log_items = list()
  155. _g_log_items_active = False
  156. def _log_items():
  157. return _g_log_items
  158. def _log_items_active(active):
  159. global _g_log_items_active
  160. _g_log_items_active = active
  161. def _log_items_clear():
  162. del _g_log_items[:]
  163. def set_messages(
  164. *,
  165. text=None,
  166. fd=None,
  167. stream=None,
  168. path=None,
  169. path_append=None,
  170. pylogging=None,
  171. pylogging_logger=None,
  172. pylogging_level=None,
  173. pylogging_name=None,
  174. ):
  175. '''
  176. Sets destination of PyMuPDF messages. See _make_output() for details.
  177. '''
  178. global _g_out_message
  179. _g_out_message = _make_output(
  180. text=text,
  181. fd=fd,
  182. stream=stream,
  183. path=path,
  184. path_append=path_append,
  185. pylogging=pylogging,
  186. pylogging_logger=pylogging_logger,
  187. pylogging_level=pylogging_level,
  188. pylogging_name=pylogging_name,
  189. default=_g_out_message,
  190. )
  191. def set_log(
  192. *,
  193. text=None,
  194. fd=None,
  195. stream=None,
  196. path=None,
  197. path_append=None,
  198. pylogging=None,
  199. pylogging_logger=None,
  200. pylogging_level=None,
  201. pylogging_name=None,
  202. ):
  203. '''
  204. Sets destination of PyMuPDF development/debugging logging. See
  205. _make_output() for details.
  206. '''
  207. global _g_out_log
  208. _g_out_log = _make_output(
  209. text=text,
  210. fd=fd,
  211. stream=stream,
  212. path=path,
  213. path_append=path_append,
  214. pylogging=pylogging,
  215. pylogging_logger=pylogging_logger,
  216. pylogging_level=pylogging_level,
  217. pylogging_name=pylogging_name,
  218. default=_g_out_log,
  219. )
  220. def log( text='', caller=1):
  221. '''
  222. For development/debugging diagnostics.
  223. '''
  224. try:
  225. stack = inspect.stack(context=0)
  226. except StopIteration:
  227. pass
  228. else:
  229. frame_record = stack[caller]
  230. try:
  231. filename = os.path.relpath(frame_record.filename)
  232. except Exception: # Can fail on windows.
  233. filename = frame_record.filename
  234. line = frame_record.lineno
  235. function = frame_record.function
  236. text = f'{filename}:{line}:{function}(): {text}'
  237. if _g_log_items_active:
  238. _g_log_items.append(text)
  239. if _g_out_log:
  240. print(text, file=_g_out_log, flush=1)
  241. def message(text=''):
  242. '''
  243. For user messages.
  244. '''
  245. # It looks like `print()` does nothing if sys.stdout is None (without
  246. # raising an exception), but we don't rely on this.
  247. if _g_out_message:
  248. print(text, file=_g_out_message, flush=1)
  249. def exception_info():
  250. import traceback
  251. log(f'exception_info:')
  252. log(traceback.format_exc())
  253. # PDF names must not contain these characters:
  254. INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0))
  255. def get_env_bool( name, default):
  256. '''
  257. Returns `True`, `False` or `default` depending on whether $<name> is '1',
  258. '0' or unset. Otherwise assert-fails.
  259. '''
  260. v = os.environ.get( name)
  261. if v is None:
  262. ret = default
  263. elif v == '1':
  264. ret = True
  265. elif v == '0':
  266. ret = False
  267. else:
  268. assert 0, f'Unrecognised value for {name}: {v!r}'
  269. if ret != default:
  270. log(f'Using non-default setting from {name}: {v!r}')
  271. return ret
  272. def get_env_int( name, default):
  273. '''
  274. Returns `True`, `False` or `default` depending on whether $<name> is '1',
  275. '0' or unset. Otherwise assert-fails.
  276. '''
  277. v = os.environ.get( name)
  278. if v is None:
  279. ret = default
  280. else:
  281. ret = int(v)
  282. if ret != default:
  283. log(f'Using non-default setting from {name}: {v}')
  284. return ret
  285. # All our `except ...` blocks output diagnostics if `g_exceptions_verbose` is
  286. # true.
  287. g_exceptions_verbose = get_env_int( 'PYMUPDF_EXCEPTIONS_VERBOSE', 1)
  288. # $PYMUPDF_USE_EXTRA overrides whether to use optimised C fns in `extra`.
  289. #
  290. g_use_extra = get_env_bool( 'PYMUPDF_USE_EXTRA', True)
  291. # Global switches
  292. #
  293. class _Globals:
  294. def __init__(self):
  295. self.no_device_caching = 0
  296. self.small_glyph_heights = 0
  297. self.subset_fontnames = 0
  298. self.skip_quad_corrections = 0
  299. _globals = _Globals()
  300. # Optionally use MuPDF via cppyy bindings; experimental and not tested recently
  301. # as of 2023-01-20 11:51:40
  302. #
  303. mupdf_cppyy = os.environ.get( 'MUPDF_CPPYY')
  304. if mupdf_cppyy is not None:
  305. # pylint: disable=all
  306. log( f'{__file__}: $MUPDF_CPPYY={mupdf_cppyy!r} so attempting to import mupdf_cppyy.')
  307. log( f'{__file__}: $PYTHONPATH={os.environ["PYTHONPATH"]}')
  308. if mupdf_cppyy == '':
  309. import mupdf_cppyy
  310. else:
  311. import importlib
  312. mupdf_cppyy = importlib.machinery.SourceFileLoader(
  313. 'mupdf_cppyy',
  314. mupdf_cppyy
  315. ).load_module()
  316. mupdf = mupdf_cppyy.cppyy.gbl.mupdf
  317. else:
  318. # Use MuPDF Python SWIG bindings. We allow import from either our own
  319. # directory for conventional wheel installs, or from separate place in case
  320. # we are using a separately-installed system installation of mupdf.
  321. #
  322. try:
  323. from . import mupdf
  324. except Exception:
  325. import mupdf
  326. if hasattr(mupdf, 'internal_check_ndebug'):
  327. mupdf.internal_check_ndebug()
  328. mupdf.reinit_singlethreaded()
  329. def _int_rc(text):
  330. '''
  331. Converts string to int, ignoring trailing 'rc...'.
  332. '''
  333. rc = text.find('rc')
  334. if rc >= 0:
  335. text = text[:rc]
  336. return int(text)
  337. # Basic version information.
  338. #
  339. # (We use `noqa F401` to avoid flake8 errors such as `F401
  340. # '._build.mupdf_location' imported but unused`.
  341. #
  342. from ._build import mupdf_location # noqa F401
  343. from ._build import pymupdf_git_branch # noqa F401
  344. from ._build import pymupdf_git_diff # noqa F401
  345. from ._build import pymupdf_git_sha # noqa F401
  346. from ._build import pymupdf_version # noqa F401
  347. from ._build import swig_version # noqa F401
  348. from ._build import swig_version_tuple # noqa F401
  349. mupdf_version = mupdf.FZ_VERSION
  350. # Removed in PyMuPDF-1.26.1.
  351. pymupdf_date = None
  352. # Versions as tuples; useful when comparing versions.
  353. #
  354. pymupdf_version_tuple = tuple( [_int_rc(i) for i in pymupdf_version.split('.')])
  355. mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')])
  356. assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \
  357. f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}'
  358. # Legacy version information.
  359. #
  360. version = (pymupdf_version, mupdf_version, None)
  361. VersionFitz = mupdf_version
  362. VersionBind = pymupdf_version
  363. VersionDate = None
  364. # String formatting.
  365. def _format_g(value, *, fmt='%g'):
  366. '''
  367. Returns `value` formatted with mupdf.fz_format_double() if available,
  368. otherwise with Python's `%`.
  369. If `value` is a list or tuple, we return a space-separated string of
  370. formatted values.
  371. '''
  372. if isinstance(value, (list, tuple)):
  373. ret = ''
  374. for v in value:
  375. if ret:
  376. ret += ' '
  377. ret += _format_g(v, fmt=fmt)
  378. return ret
  379. else:
  380. return mupdf.fz_format_double(fmt, value)
  381. format_g = _format_g
  382. # ByteString is gone from typing in 3.14.
  383. # collections.abc.Buffer available from 3.12 only
  384. try:
  385. ByteString = typing.ByteString
  386. except AttributeError:
  387. ByteString = bytes | bytearray | memoryview
  388. # Names required by class method typing annotations.
  389. OptBytes = typing.Optional[ByteString]
  390. OptDict = typing.Optional[dict]
  391. OptFloat = typing.Optional[float]
  392. OptInt = typing.Union[int, None]
  393. OptSeq = typing.Optional[typing.Sequence]
  394. OptStr = typing.Optional[str]
  395. Page = 'Page_forward_decl'
  396. Point = 'Point_forward_decl'
  397. matrix_like = 'matrix_like'
  398. point_like = 'point_like'
  399. quad_like = 'quad_like'
  400. rect_like = 'rect_like'
  401. def _as_fz_document(document):
  402. '''
  403. Returns document as a mupdf.FzDocument, upcasting as required. Raises
  404. 'document closed' exception if closed.
  405. '''
  406. if isinstance(document, Document):
  407. if document.is_closed:
  408. raise ValueError('document closed')
  409. document = document.this
  410. if isinstance(document, mupdf.FzDocument):
  411. return document
  412. elif isinstance(document, mupdf.PdfDocument):
  413. return document.super()
  414. elif document is None:
  415. assert 0, f'document is None'
  416. else:
  417. assert 0, f'Unrecognised {type(document)=}'
  418. def _as_pdf_document(document, required=True):
  419. '''
  420. Returns `document` downcast to a mupdf.PdfDocument. If downcast fails (i.e.
  421. `document` is not actually a `PdfDocument`) then we assert-fail if `required`
  422. is true (the default) else return a `mupdf.PdfDocument` with `.m_internal`
  423. false.
  424. '''
  425. if isinstance(document, Document):
  426. if document.is_closed:
  427. raise ValueError('document closed')
  428. document = document.this
  429. if isinstance(document, mupdf.PdfDocument):
  430. return document
  431. elif isinstance(document, mupdf.FzDocument):
  432. ret = mupdf.PdfDocument(document)
  433. if required:
  434. assert ret.m_internal
  435. return ret
  436. elif document is None:
  437. assert 0, f'document is None'
  438. else:
  439. assert 0, f'Unrecognised {type(document)=}'
  440. def _as_fz_page(page):
  441. '''
  442. Returns page as a mupdf.FzPage, upcasting as required.
  443. '''
  444. if isinstance(page, Page):
  445. page = page.this
  446. if isinstance(page, mupdf.PdfPage):
  447. return page.super()
  448. elif isinstance(page, mupdf.FzPage):
  449. return page
  450. elif page is None:
  451. assert 0, f'page is None'
  452. else:
  453. assert 0, f'Unrecognised {type(page)=}'
  454. def _as_pdf_page(page, required=True):
  455. '''
  456. Returns `page` downcast to a mupdf.PdfPage. If downcast fails (i.e. `page`
  457. is not actually a `PdfPage`) then we assert-fail if `required` is true (the
  458. default) else return a `mupdf.PdfPage` with `.m_internal` false.
  459. '''
  460. if isinstance(page, Page):
  461. page = page.this
  462. if isinstance(page, mupdf.PdfPage):
  463. return page
  464. elif isinstance(page, mupdf.FzPage):
  465. ret = mupdf.pdf_page_from_fz_page(page)
  466. if required:
  467. assert ret.m_internal
  468. return ret
  469. elif page is None:
  470. assert 0, f'page is None'
  471. else:
  472. assert 0, f'Unrecognised {type(page)=}'
  473. def _pdf_annot_page(annot):
  474. '''
  475. Wrapper for mupdf.pdf_annot_page() which raises an exception if <annot>
  476. is not bound to a page instead of returning a mupdf.PdfPage with
  477. `.m_internal=None`.
  478. [Some other MuPDF functions such as pdf_update_annot()` already raise a
  479. similar exception if a pdf_annot's .page field is null.]
  480. '''
  481. page = mupdf.pdf_annot_page(annot)
  482. if not page.m_internal:
  483. raise RuntimeError('Annot is not bound to a page')
  484. return page
  485. # Fixme: we don't support JM_MEMORY=1.
  486. JM_MEMORY = 0
  487. # Classes
  488. #
  489. class Annot:
  490. def __init__(self, annot):
  491. assert isinstance( annot, mupdf.PdfAnnot)
  492. self.this = annot
  493. def __repr__(self):
  494. parent = getattr(self, 'parent', '<>')
  495. return "'%s' annotation on %s" % (self.type[1], str(parent))
  496. def __str__(self):
  497. return self.__repr__()
  498. def _erase(self):
  499. if getattr(self, "thisown", False):
  500. self.thisown = False
  501. def _get_redact_values(self):
  502. annot = self.this
  503. if mupdf.pdf_annot_type(annot) != mupdf.PDF_ANNOT_REDACT:
  504. return
  505. values = dict()
  506. try:
  507. obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "RO")
  508. if obj.m_internal:
  509. message_warning("Ignoring redaction key '/RO'.")
  510. xref = mupdf.pdf_to_num(obj)
  511. values[dictkey_xref] = xref
  512. obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "OverlayText")
  513. if obj.m_internal:
  514. text = mupdf.pdf_to_text_string(obj)
  515. values[dictkey_text] = JM_UnicodeFromStr(text)
  516. else:
  517. values[dictkey_text] = ''
  518. obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'))
  519. align = 0
  520. if obj.m_internal:
  521. align = mupdf.pdf_to_int(obj)
  522. values[dictkey_align] = align
  523. except Exception:
  524. if g_exceptions_verbose: exception_info()
  525. return
  526. val = values
  527. if not val:
  528. return val
  529. val["rect"] = self.rect
  530. text_color, fontname, fontsize = TOOLS._parse_da(self)
  531. val["text_color"] = text_color
  532. val["fontname"] = fontname
  533. val["fontsize"] = fontsize
  534. fill = self.colors["fill"]
  535. val["fill"] = fill
  536. return val
  537. def _getAP(self):
  538. if g_use_extra:
  539. assert isinstance( self.this, mupdf.PdfAnnot)
  540. ret = extra.Annot_getAP(self.this)
  541. assert isinstance( ret, bytes)
  542. return ret
  543. else:
  544. r = None
  545. res = None
  546. annot = self.this
  547. assert isinstance( annot, mupdf.PdfAnnot)
  548. annot_obj = mupdf.pdf_annot_obj( annot)
  549. ap = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  550. if mupdf.pdf_is_stream( ap):
  551. res = mupdf.pdf_load_stream( ap)
  552. if res and res.m_internal:
  553. r = JM_BinFromBuffer(res)
  554. return r
  555. def _setAP(self, buffer_, rect=0):
  556. try:
  557. annot = self.this
  558. annot_obj = mupdf.pdf_annot_obj( annot)
  559. page = _pdf_annot_page(annot)
  560. apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  561. if not apobj.m_internal:
  562. raise RuntimeError( MSG_BAD_APN)
  563. if not mupdf.pdf_is_stream( apobj):
  564. raise RuntimeError( MSG_BAD_APN)
  565. res = JM_BufferFromBytes( buffer_)
  566. if not res.m_internal:
  567. raise ValueError( MSG_BAD_BUFFER)
  568. JM_update_stream( page.doc(), apobj, res, 1)
  569. if rect:
  570. bbox = mupdf.pdf_dict_get_rect( annot_obj, PDF_NAME('Rect'))
  571. mupdf.pdf_dict_put_rect( apobj, PDF_NAME('BBox'), bbox)
  572. except Exception:
  573. if g_exceptions_verbose: exception_info()
  574. def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotate=-1):
  575. annot = self.this
  576. assert annot.m_internal
  577. annot_obj = mupdf.pdf_annot_obj( annot)
  578. page = _pdf_annot_page(annot)
  579. pdf = page.doc()
  580. type_ = mupdf.pdf_annot_type( annot)
  581. nfcol, fcol = JM_color_FromSequence(fill_color)
  582. try:
  583. # remove fill color from unsupported annots
  584. # or if so requested
  585. if nfcol == 0 or type_ not in (
  586. mupdf.PDF_ANNOT_SQUARE,
  587. mupdf.PDF_ANNOT_CIRCLE,
  588. mupdf.PDF_ANNOT_LINE,
  589. mupdf.PDF_ANNOT_POLY_LINE,
  590. mupdf.PDF_ANNOT_POLYGON
  591. ):
  592. mupdf.pdf_dict_del( annot_obj, PDF_NAME('IC'))
  593. elif nfcol > 0:
  594. mupdf.pdf_set_annot_interior_color( annot, fcol[:nfcol])
  595. insert_rot = 1 if rotate >= 0 else 0
  596. if type_ not in (
  597. mupdf.PDF_ANNOT_CARET,
  598. mupdf.PDF_ANNOT_CIRCLE,
  599. mupdf.PDF_ANNOT_FREE_TEXT,
  600. mupdf.PDF_ANNOT_FILE_ATTACHMENT,
  601. mupdf.PDF_ANNOT_INK,
  602. mupdf.PDF_ANNOT_LINE,
  603. mupdf.PDF_ANNOT_POLY_LINE,
  604. mupdf.PDF_ANNOT_POLYGON,
  605. mupdf.PDF_ANNOT_SQUARE,
  606. mupdf.PDF_ANNOT_STAMP,
  607. mupdf.PDF_ANNOT_TEXT,
  608. ):
  609. insert_rot = 0
  610. if insert_rot:
  611. mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
  612. # insert fill color
  613. if type_ == mupdf.PDF_ANNOT_FREE_TEXT:
  614. if nfcol > 0:
  615. mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
  616. elif nfcol > 0:
  617. col = mupdf.pdf_new_array(page.doc(), nfcol)
  618. for i in range( nfcol):
  619. mupdf.pdf_array_push_real(col, fcol[i])
  620. mupdf.pdf_dict_put(annot_obj, PDF_NAME('IC'), col)
  621. mupdf.pdf_dirty_annot(annot)
  622. mupdf.pdf_update_annot(annot) # let MuPDF update
  623. pdf.resynth_required = 0
  624. except Exception as e:
  625. if g_exceptions_verbose:
  626. exception_info()
  627. message( f'cannot update annot: {e}')
  628. raise
  629. if (opacity < 0 or opacity >= 1) and not blend_mode: # no opacity, no blend_mode
  630. return True
  631. try: # create or update /ExtGState
  632. ap = mupdf.pdf_dict_getl(
  633. mupdf.pdf_annot_obj(annot),
  634. PDF_NAME('AP'),
  635. PDF_NAME('N')
  636. )
  637. if not ap.m_internal: # should never happen
  638. raise RuntimeError( MSG_BAD_APN)
  639. resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources'))
  640. if not resources.m_internal: # no Resources yet: make one
  641. resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2)
  642. alp0 = mupdf.pdf_new_dict( page.doc(), 3)
  643. if opacity >= 0 and opacity < 1:
  644. mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity)
  645. mupdf.pdf_dict_put_real( alp0, PDF_NAME('ca'), opacity)
  646. mupdf.pdf_dict_put_real( annot_obj, PDF_NAME('CA'), opacity)
  647. if blend_mode:
  648. mupdf.pdf_dict_put_name( alp0, PDF_NAME('BM'), blend_mode)
  649. mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('BM'), blend_mode)
  650. extg = mupdf.pdf_dict_get( resources, PDF_NAME('ExtGState'))
  651. if not extg.m_internal: # no ExtGState yet: make one
  652. extg = mupdf.pdf_dict_put_dict( resources, PDF_NAME('ExtGState'), 2)
  653. mupdf.pdf_dict_put( extg, PDF_NAME('H'), alp0)
  654. except Exception as e:
  655. if g_exceptions_verbose: exception_info()
  656. message( f'cannot set opacity or blend mode\n: {e}')
  657. raise
  658. return True
  659. @property
  660. def apn_bbox(self):
  661. """annotation appearance bbox"""
  662. CheckParent(self)
  663. annot = self.this
  664. annot_obj = mupdf.pdf_annot_obj(annot)
  665. ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  666. if not ap.m_internal:
  667. val = JM_py_from_rect(mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE))
  668. else:
  669. rect = mupdf.pdf_dict_get_rect(ap, PDF_NAME('BBox'))
  670. val = JM_py_from_rect(rect)
  671. val = Rect(val) * self.get_parent().transformation_matrix
  672. val *= self.get_parent().derotation_matrix
  673. return val
  674. @property
  675. def apn_matrix(self):
  676. """annotation appearance matrix"""
  677. try:
  678. CheckParent(self)
  679. annot = self.this
  680. assert isinstance(annot, mupdf.PdfAnnot)
  681. ap = mupdf.pdf_dict_getl(
  682. mupdf.pdf_annot_obj(annot),
  683. mupdf.PDF_ENUM_NAME_AP,
  684. mupdf.PDF_ENUM_NAME_N
  685. )
  686. if not ap.m_internal:
  687. return JM_py_from_matrix(mupdf.FzMatrix())
  688. mat = mupdf.pdf_dict_get_matrix(ap, mupdf.PDF_ENUM_NAME_Matrix)
  689. val = JM_py_from_matrix(mat)
  690. val = Matrix(val)
  691. return val
  692. except Exception:
  693. if g_exceptions_verbose: exception_info()
  694. raise
  695. @property
  696. def blendmode(self):
  697. """annotation BlendMode"""
  698. CheckParent(self)
  699. annot = self.this
  700. annot_obj = mupdf.pdf_annot_obj(annot)
  701. obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('BM'))
  702. blend_mode = None
  703. if obj.m_internal:
  704. blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(obj))
  705. return blend_mode
  706. # loop through the /AP/N/Resources/ExtGState objects
  707. obj = mupdf.pdf_dict_getl(
  708. annot_obj,
  709. PDF_NAME('AP'),
  710. PDF_NAME('N'),
  711. PDF_NAME('Resources'),
  712. PDF_NAME('ExtGState'),
  713. )
  714. if mupdf.pdf_is_dict(obj):
  715. n = mupdf.pdf_dict_len(obj)
  716. for i in range(n):
  717. obj1 = mupdf.pdf_dict_get_val(obj, i)
  718. if mupdf.pdf_is_dict(obj1):
  719. m = mupdf.pdf_dict_len(obj1)
  720. for j in range(m):
  721. obj2 = mupdf.pdf_dict_get_key(obj1, j)
  722. if mupdf.pdf_objcmp(obj2, PDF_NAME('BM')) == 0:
  723. blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(mupdf.pdf_dict_get_val(obj1, j)))
  724. return blend_mode
  725. return blend_mode
  726. @property
  727. def border(self):
  728. """Border information."""
  729. CheckParent(self)
  730. atype = self.type[0]
  731. if atype not in (
  732. mupdf.PDF_ANNOT_CIRCLE,
  733. mupdf.PDF_ANNOT_FREE_TEXT,
  734. mupdf.PDF_ANNOT_INK,
  735. mupdf.PDF_ANNOT_LINE,
  736. mupdf.PDF_ANNOT_POLY_LINE,
  737. mupdf.PDF_ANNOT_POLYGON,
  738. mupdf.PDF_ANNOT_SQUARE,
  739. ):
  740. return dict()
  741. ao = mupdf.pdf_annot_obj(self.this)
  742. ret = JM_annot_border(ao)
  743. return ret
  744. def clean_contents(self, sanitize=1):
  745. """Clean appearance contents stream."""
  746. CheckParent(self)
  747. annot = self.this
  748. pdf = mupdf.pdf_get_bound_document(mupdf.pdf_annot_obj(annot))
  749. filter_ = _make_PdfFilterOptions(recurse=1, instance_forms=0, ascii=0, sanitize=sanitize)
  750. mupdf.pdf_filter_annot_contents(pdf, annot, filter_)
  751. @property
  752. def colors(self):
  753. """Color definitions."""
  754. try:
  755. CheckParent(self)
  756. annot = self.this
  757. assert isinstance(annot, mupdf.PdfAnnot)
  758. return JM_annot_colors(mupdf.pdf_annot_obj(annot))
  759. except Exception:
  760. if g_exceptions_verbose: exception_info()
  761. raise
  762. def delete_responses(self):
  763. """Delete 'Popup' and responding annotations."""
  764. CheckParent(self)
  765. annot = self.this
  766. annot_obj = mupdf.pdf_annot_obj(annot)
  767. page = _pdf_annot_page(annot)
  768. while 1:
  769. irt_annot = JM_find_annot_irt(annot)
  770. if not irt_annot:
  771. break
  772. mupdf.pdf_delete_annot(page, irt_annot)
  773. mupdf.pdf_dict_del(annot_obj, PDF_NAME('Popup'))
  774. annots = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Annots'))
  775. n = mupdf.pdf_array_len(annots)
  776. found = 0
  777. for i in range(n-1, -1, -1):
  778. o = mupdf.pdf_array_get(annots, i)
  779. p = mupdf.pdf_dict_get(o, PDF_NAME('Parent'))
  780. if not o.m_internal:
  781. continue
  782. if not mupdf.pdf_objcmp(p, annot_obj):
  783. mupdf.pdf_array_delete(annots, i)
  784. found = 1
  785. if found:
  786. mupdf.pdf_dict_put(page.obj(), PDF_NAME('Annots'), annots)
  787. @property
  788. def file_info(self):
  789. """Attached file information."""
  790. CheckParent(self)
  791. res = dict()
  792. length = -1
  793. size = -1
  794. desc = None
  795. annot = self.this
  796. annot_obj = mupdf.pdf_annot_obj(annot)
  797. type_ = mupdf.pdf_annot_type(annot)
  798. if type_ != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
  799. raise TypeError( MSG_BAD_ANNOT_TYPE)
  800. stream = mupdf.pdf_dict_getl(
  801. annot_obj,
  802. PDF_NAME('FS'),
  803. PDF_NAME('EF'),
  804. PDF_NAME('F'),
  805. )
  806. if not stream.m_internal:
  807. RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
  808. fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
  809. o = mupdf.pdf_dict_get(fs, PDF_NAME('UF'))
  810. if o.m_internal:
  811. filename = mupdf.pdf_to_text_string(o)
  812. else:
  813. o = mupdf.pdf_dict_get(fs, PDF_NAME('F'))
  814. if o.m_internal:
  815. filename = mupdf.pdf_to_text_string(o)
  816. o = mupdf.pdf_dict_get(fs, PDF_NAME('Desc'))
  817. if o.m_internal:
  818. desc = mupdf.pdf_to_text_string(o)
  819. o = mupdf.pdf_dict_get(stream, PDF_NAME('Length'))
  820. if o.m_internal:
  821. length = mupdf.pdf_to_int(o)
  822. o = mupdf.pdf_dict_getl(stream, PDF_NAME('Params'), PDF_NAME('Size'))
  823. if o.m_internal:
  824. size = mupdf.pdf_to_int(o)
  825. res[ dictkey_filename] = JM_EscapeStrFromStr(filename)
  826. res[ dictkey_descr] = JM_UnicodeFromStr(desc)
  827. res[ dictkey_length] = length
  828. res[ dictkey_size] = size
  829. return res
  830. @property
  831. def flags(self):
  832. """Flags field."""
  833. CheckParent(self)
  834. annot = self.this
  835. return mupdf.pdf_annot_flags(annot)
  836. def get_file(self):
  837. """Retrieve attached file content."""
  838. CheckParent(self)
  839. annot = self.this
  840. annot_obj = mupdf.pdf_annot_obj(annot)
  841. type = mupdf.pdf_annot_type(annot)
  842. if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
  843. raise TypeError( MSG_BAD_ANNOT_TYPE)
  844. stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
  845. if not stream.m_internal:
  846. RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
  847. buf = mupdf.pdf_load_stream(stream)
  848. res = JM_BinFromBuffer(buf)
  849. return res
  850. def get_oc(self):
  851. """Get annotation optional content reference."""
  852. CheckParent(self)
  853. oc = 0
  854. annot = self.this
  855. annot_obj = mupdf.pdf_annot_obj(annot)
  856. obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('OC'))
  857. if obj.m_internal:
  858. oc = mupdf.pdf_to_num(obj)
  859. return oc
  860. # PyMuPDF doesn't seem to have this .parent member, but removing it breaks
  861. # 11 tests...?
  862. #@property
  863. def get_parent(self):
  864. try:
  865. ret = getattr( self, 'parent')
  866. except AttributeError:
  867. page = _pdf_annot_page(self.this)
  868. assert isinstance( page, mupdf.PdfPage)
  869. document = Document( page.doc()) if page.m_internal else None
  870. ret = Page(page, document)
  871. #self.parent = weakref.proxy( ret)
  872. self.parent = ret
  873. #log(f'No attribute .parent: {type(self)=} {id(self)=}: have set {id(self.parent)=}.')
  874. #log( f'Have set self.parent')
  875. return ret
  876. def get_pixmap(self, matrix=None, dpi=None, colorspace=None, alpha=0):
  877. """annotation Pixmap"""
  878. CheckParent(self)
  879. cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK}
  880. if type(colorspace) is str:
  881. colorspace = cspaces.get(colorspace.lower(), None)
  882. if dpi:
  883. matrix = Matrix(dpi / 72, dpi / 72)
  884. ctm = JM_matrix_from_py(matrix)
  885. cs = colorspace
  886. if not cs:
  887. cs = mupdf.fz_device_rgb()
  888. pix = mupdf.pdf_new_pixmap_from_annot(self.this, ctm, cs, mupdf.FzSeparations(0), alpha)
  889. ret = Pixmap(pix)
  890. if dpi:
  891. ret.set_dpi(dpi, dpi)
  892. return ret
  893. def get_sound(self):
  894. """Retrieve sound stream."""
  895. CheckParent(self)
  896. annot = self.this
  897. annot_obj = mupdf.pdf_annot_obj(annot)
  898. type = mupdf.pdf_annot_type(annot)
  899. sound = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Sound'))
  900. if type != mupdf.PDF_ANNOT_SOUND or not sound.m_internal:
  901. raise TypeError( MSG_BAD_ANNOT_TYPE)
  902. if mupdf.pdf_dict_get(sound, PDF_NAME('F')).m_internal:
  903. RAISEPY( "unsupported sound stream", JM_Exc_FileDataError)
  904. res = dict()
  905. obj = mupdf.pdf_dict_get(sound, PDF_NAME('R'))
  906. if obj.m_internal:
  907. res['rate'] = mupdf.pdf_to_real(obj)
  908. obj = mupdf.pdf_dict_get(sound, PDF_NAME('C'))
  909. if obj.m_internal:
  910. res['channels'] = mupdf.pdf_to_int(obj)
  911. obj = mupdf.pdf_dict_get(sound, PDF_NAME('B'))
  912. if obj.m_internal:
  913. res['bps'] = mupdf.pdf_to_int(obj)
  914. obj = mupdf.pdf_dict_get(sound, PDF_NAME('E'))
  915. if obj.m_internal:
  916. res['encoding'] = mupdf.pdf_to_name(obj)
  917. obj = mupdf.pdf_dict_gets(sound, "CO")
  918. if obj.m_internal:
  919. res['compression'] = mupdf.pdf_to_name(obj)
  920. buf = mupdf.pdf_load_stream(sound)
  921. stream = JM_BinFromBuffer(buf)
  922. res['stream'] = stream
  923. return res
  924. def get_textpage(self, clip=None, flags=0):
  925. """Make annotation TextPage."""
  926. CheckParent(self)
  927. options = mupdf.FzStextOptions(flags)
  928. if clip:
  929. assert hasattr(mupdf, 'FZ_STEXT_CLIP_RECT'), f'MuPDF-{mupdf_version} does not support FZ_STEXT_CLIP_RECT.'
  930. clip2 = JM_rect_from_py(clip)
  931. options.clip = clip2.internal()
  932. options.flags |= mupdf.FZ_STEXT_CLIP_RECT
  933. annot = self.this
  934. stextpage = mupdf.FzStextPage(annot, options)
  935. ret = TextPage(stextpage)
  936. p = self.get_parent()
  937. if isinstance(p, weakref.ProxyType):
  938. ret.parent = p
  939. else:
  940. ret.parent = weakref.proxy(p)
  941. return ret
  942. @property
  943. def has_popup(self):
  944. """Check if annotation has a Popup."""
  945. CheckParent(self)
  946. annot = self.this
  947. obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Popup'))
  948. return True if obj.m_internal else False
  949. @property
  950. def info(self):
  951. """Various information details."""
  952. CheckParent(self)
  953. annot = self.this
  954. res = dict()
  955. res[dictkey_content] = JM_UnicodeFromStr(mupdf.pdf_annot_contents(annot))
  956. o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Name'))
  957. res[dictkey_name] = JM_UnicodeFromStr(mupdf.pdf_to_name(o))
  958. # Title (= author)
  959. o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('T'))
  960. res[dictkey_title] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
  961. # CreationDate
  962. o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "CreationDate")
  963. res[dictkey_creationDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
  964. # ModDate
  965. o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('M'))
  966. res[dictkey_modDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
  967. # Subj
  968. o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "Subj")
  969. res[dictkey_subject] = mupdf.pdf_to_text_string(o)
  970. # Identification (PDF key /NM)
  971. o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM")
  972. res[dictkey_id] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
  973. return res
  974. @property
  975. def irt_xref(self):
  976. '''
  977. annotation IRT xref
  978. '''
  979. annot = self.this
  980. annot_obj = mupdf.pdf_annot_obj( annot)
  981. irt = mupdf.pdf_dict_get( annot_obj, PDF_NAME('IRT'))
  982. if not irt.m_internal:
  983. return 0
  984. return mupdf.pdf_to_num( irt)
  985. @property
  986. def is_open(self):
  987. """Get 'open' status of annotation or its Popup."""
  988. CheckParent(self)
  989. return mupdf.pdf_annot_is_open(self.this)
  990. @property
  991. def language(self):
  992. """annotation language"""
  993. this_annot = self.this
  994. lang = mupdf.pdf_annot_language(this_annot)
  995. if lang == mupdf.FZ_LANG_UNSET:
  996. return
  997. assert hasattr(mupdf, 'fz_string_from_text_language2')
  998. return mupdf.fz_string_from_text_language2(lang)
  999. @property
  1000. def line_ends(self):
  1001. """Line end codes."""
  1002. CheckParent(self)
  1003. annot = self.this
  1004. # return nothing for invalid annot types
  1005. if not mupdf.pdf_annot_has_line_ending_styles(annot):
  1006. return
  1007. lstart = mupdf.pdf_annot_line_start_style(annot)
  1008. lend = mupdf.pdf_annot_line_end_style(annot)
  1009. return lstart, lend
  1010. @property
  1011. def next(self):
  1012. """Next annotation."""
  1013. CheckParent(self)
  1014. this_annot = self.this
  1015. assert isinstance(this_annot, mupdf.PdfAnnot)
  1016. assert this_annot.m_internal
  1017. type_ = mupdf.pdf_annot_type(this_annot)
  1018. if type_ != mupdf.PDF_ANNOT_WIDGET:
  1019. annot = mupdf.pdf_next_annot(this_annot)
  1020. else:
  1021. annot = mupdf.pdf_next_widget(this_annot)
  1022. val = Annot(annot) if annot.m_internal else None
  1023. if not val:
  1024. return None
  1025. val.thisown = True
  1026. assert val.get_parent().this.m_internal_value() == self.get_parent().this.m_internal_value()
  1027. val.parent._annot_refs[id(val)] = val
  1028. if val.type[0] == mupdf.PDF_ANNOT_WIDGET:
  1029. widget = Widget()
  1030. TOOLS._fill_widget(val, widget)
  1031. val = widget
  1032. return val
  1033. @property
  1034. def opacity(self):
  1035. """Opacity."""
  1036. CheckParent(self)
  1037. annot = self.this
  1038. opy = -1
  1039. ca = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_CA)
  1040. if mupdf.pdf_is_number(ca):
  1041. opy = mupdf.pdf_to_real(ca)
  1042. return opy
  1043. @property
  1044. def popup_rect(self):
  1045. """annotation 'Popup' rectangle"""
  1046. CheckParent(self)
  1047. rect = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
  1048. annot = self.this
  1049. annot_obj = mupdf.pdf_annot_obj( annot)
  1050. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Popup'))
  1051. if obj.m_internal:
  1052. rect = mupdf.pdf_dict_get_rect(obj, PDF_NAME('Rect'))
  1053. #log( '{rect=}')
  1054. val = JM_py_from_rect(rect)
  1055. #log( '{val=}')
  1056. val = Rect(val) * self.get_parent().transformation_matrix
  1057. val *= self.get_parent().derotation_matrix
  1058. return val
  1059. @property
  1060. def popup_xref(self):
  1061. """annotation 'Popup' xref"""
  1062. CheckParent(self)
  1063. xref = 0
  1064. annot = self.this
  1065. annot_obj = mupdf.pdf_annot_obj(annot)
  1066. obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Popup'))
  1067. if obj.m_internal:
  1068. xref = mupdf.pdf_to_num(obj)
  1069. return xref
  1070. @property
  1071. def rect(self):
  1072. """annotation rectangle"""
  1073. if g_use_extra:
  1074. val = extra.Annot_rect3( self.this)
  1075. else:
  1076. val = mupdf.pdf_bound_annot(self.this)
  1077. val = Rect(val)
  1078. # Caching self.parent_() reduces 1000x from 0.07 to 0.04.
  1079. #
  1080. p = self.get_parent()
  1081. #p = getattr( self, 'parent', None)
  1082. #if p is None:
  1083. # p = self.parent
  1084. # self.parent = p
  1085. #p = self.parent_()
  1086. val *= p.derotation_matrix
  1087. return val
  1088. @property
  1089. def rect_delta(self):
  1090. '''
  1091. annotation delta values to rectangle
  1092. '''
  1093. annot_obj = mupdf.pdf_annot_obj(self.this)
  1094. arr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('RD'))
  1095. if mupdf.pdf_array_len( arr) == 4:
  1096. return (
  1097. mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 0)),
  1098. mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 1)),
  1099. -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 2)),
  1100. -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 3)),
  1101. )
  1102. @property
  1103. def rotation(self):
  1104. """annotation rotation"""
  1105. CheckParent(self)
  1106. annot = self.this
  1107. rotation = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_Rotate)
  1108. if not rotation.m_internal:
  1109. return -1
  1110. return mupdf.pdf_to_int( rotation)
  1111. def set_apn_bbox(self, bbox):
  1112. """
  1113. Set annotation appearance bbox.
  1114. """
  1115. CheckParent(self)
  1116. page = self.get_parent()
  1117. rot = page.rotation_matrix
  1118. mat = page.transformation_matrix
  1119. bbox *= rot * ~mat
  1120. annot = self.this
  1121. annot_obj = mupdf.pdf_annot_obj(annot)
  1122. ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  1123. if not ap.m_internal:
  1124. raise RuntimeError( MSG_BAD_APN)
  1125. rect = JM_rect_from_py(bbox)
  1126. mupdf.pdf_dict_put_rect(ap, PDF_NAME('BBox'), rect)
  1127. def set_apn_matrix(self, matrix):
  1128. """Set annotation appearance matrix."""
  1129. CheckParent(self)
  1130. annot = self.this
  1131. annot_obj = mupdf.pdf_annot_obj(annot)
  1132. ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
  1133. if not ap.m_internal:
  1134. raise RuntimeError( MSG_BAD_APN)
  1135. mat = JM_matrix_from_py(matrix)
  1136. mupdf.pdf_dict_put_matrix(ap, PDF_NAME('Matrix'), mat)
  1137. def set_blendmode(self, blend_mode):
  1138. """Set annotation BlendMode."""
  1139. CheckParent(self)
  1140. annot = self.this
  1141. annot_obj = mupdf.pdf_annot_obj(annot)
  1142. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('BM'), blend_mode)
  1143. def set_border(self, border=None, width=-1, style=None, dashes=None, clouds=-1):
  1144. """Set border properties.
  1145. Either a dict, or direct arguments width, style, dashes or clouds."""
  1146. CheckParent(self)
  1147. atype, atname = self.type[:2] # annotation type
  1148. if atype not in (
  1149. mupdf.PDF_ANNOT_CIRCLE,
  1150. mupdf.PDF_ANNOT_FREE_TEXT,
  1151. mupdf.PDF_ANNOT_INK,
  1152. mupdf.PDF_ANNOT_LINE,
  1153. mupdf.PDF_ANNOT_POLY_LINE,
  1154. mupdf.PDF_ANNOT_POLYGON,
  1155. mupdf.PDF_ANNOT_SQUARE,
  1156. ):
  1157. message(f"Cannot set border for '{atname}'.")
  1158. return None
  1159. if atype not in (
  1160. mupdf.PDF_ANNOT_CIRCLE,
  1161. mupdf.PDF_ANNOT_FREE_TEXT,
  1162. mupdf.PDF_ANNOT_POLYGON,
  1163. mupdf.PDF_ANNOT_SQUARE,
  1164. ):
  1165. if clouds > 0:
  1166. message(f"Cannot set cloudy border for '{atname}'.")
  1167. clouds = -1 # do not set border effect
  1168. if type(border) is not dict:
  1169. border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds}
  1170. border.setdefault("width", -1)
  1171. border.setdefault("style", None)
  1172. border.setdefault("dashes", None)
  1173. border.setdefault("clouds", -1)
  1174. if border["width"] is None:
  1175. border["width"] = -1
  1176. if border["clouds"] is None:
  1177. border["clouds"] = -1
  1178. if hasattr(border["dashes"], "__getitem__"): # ensure sequence items are integers
  1179. border["dashes"] = tuple(border["dashes"])
  1180. for item in border["dashes"]:
  1181. if not isinstance(item, int):
  1182. border["dashes"] = None
  1183. break
  1184. annot = self.this
  1185. annot_obj = mupdf.pdf_annot_obj( annot)
  1186. pdf = mupdf.pdf_get_bound_document( annot_obj)
  1187. return JM_annot_set_border( border, pdf, annot_obj)
  1188. def set_colors(self, colors=None, stroke=None, fill=None):
  1189. """Set 'stroke' and 'fill' colors.
  1190. Use either a dict or the direct arguments.
  1191. """
  1192. if self.type[0] == mupdf.PDF_ANNOT_FREE_TEXT:
  1193. raise ValueError("cannot be used for FreeText annotations")
  1194. CheckParent(self)
  1195. doc = self.get_parent().parent
  1196. if type(colors) is not dict:
  1197. colors = {"fill": fill, "stroke": stroke}
  1198. fill = colors.get("fill")
  1199. stroke = colors.get("stroke")
  1200. fill_annots = (mupdf.PDF_ANNOT_CIRCLE, mupdf.PDF_ANNOT_SQUARE, mupdf.PDF_ANNOT_LINE, mupdf.PDF_ANNOT_POLY_LINE, mupdf.PDF_ANNOT_POLYGON,
  1201. mupdf.PDF_ANNOT_REDACT,)
  1202. if stroke in ([], ()):
  1203. doc.xref_set_key(self.xref, "C", "[]")
  1204. elif stroke is not None:
  1205. if hasattr(stroke, "__float__"):
  1206. stroke = [float(stroke)]
  1207. CheckColor(stroke)
  1208. assert len(stroke) in (1, 3, 4)
  1209. s = f"[{_format_g(stroke)}]"
  1210. doc.xref_set_key(self.xref, "C", s)
  1211. if fill and self.type[0] not in fill_annots:
  1212. message("Warning: fill color ignored for annot type '%s'." % self.type[1])
  1213. return
  1214. if fill in ([], ()):
  1215. doc.xref_set_key(self.xref, "IC", "[]")
  1216. elif fill is not None:
  1217. if hasattr(fill, "__float__"):
  1218. fill = [float(fill)]
  1219. CheckColor(fill)
  1220. assert len(fill) in (1, 3, 4)
  1221. s = f"[{_format_g(fill)}]"
  1222. doc.xref_set_key(self.xref, "IC", s)
  1223. def set_flags(self, flags):
  1224. """Set annotation flags."""
  1225. CheckParent(self)
  1226. annot = self.this
  1227. mupdf.pdf_set_annot_flags(annot, flags)
  1228. def set_info(self, info=None, content=None, title=None, creationDate=None, modDate=None, subject=None):
  1229. """Set various properties."""
  1230. CheckParent(self)
  1231. if type(info) is dict: # build the args from the dictionary
  1232. content = info.get("content", None)
  1233. title = info.get("title", None)
  1234. creationDate = info.get("creationDate", None)
  1235. modDate = info.get("modDate", None)
  1236. subject = info.get("subject", None)
  1237. info = None
  1238. annot = self.this
  1239. # use this to indicate a 'markup' annot type
  1240. is_markup = mupdf.pdf_annot_has_author(annot)
  1241. # contents
  1242. if content:
  1243. mupdf.pdf_set_annot_contents(annot, content)
  1244. if is_markup:
  1245. # title (= author)
  1246. if title:
  1247. mupdf.pdf_set_annot_author(annot, title)
  1248. # creation date
  1249. if creationDate:
  1250. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('CreationDate'), creationDate)
  1251. # mod date
  1252. if modDate:
  1253. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('M'), modDate)
  1254. # subject
  1255. if subject:
  1256. mupdf.pdf_dict_puts(mupdf.pdf_annot_obj(annot), "Subj", mupdf.pdf_new_text_string(subject))
  1257. def set_irt_xref(self, xref):
  1258. '''
  1259. Set annotation IRT xref
  1260. '''
  1261. annot = self.this
  1262. annot_obj = mupdf.pdf_annot_obj( annot)
  1263. page = _pdf_annot_page(annot)
  1264. if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()):
  1265. raise ValueError( MSG_BAD_XREF)
  1266. irt = mupdf.pdf_new_indirect( page.doc(), xref, 0)
  1267. subt = mupdf.pdf_dict_get( irt, PDF_NAME('Subtype'))
  1268. irt_subt = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subt))
  1269. if irt_subt < 0:
  1270. raise ValueError( MSG_IS_NO_ANNOT)
  1271. mupdf.pdf_dict_put( annot_obj, PDF_NAME('IRT'), irt)
  1272. def set_language(self, language=None):
  1273. """Set annotation language."""
  1274. CheckParent(self)
  1275. this_annot = self.this
  1276. if not language:
  1277. lang = mupdf.FZ_LANG_UNSET
  1278. else:
  1279. lang = mupdf.fz_text_language_from_string(language)
  1280. mupdf.pdf_set_annot_language(this_annot, lang)
  1281. def set_line_ends(self, start, end):
  1282. """Set line end codes."""
  1283. CheckParent(self)
  1284. annot = self.this
  1285. if mupdf.pdf_annot_has_line_ending_styles(annot):
  1286. mupdf.pdf_set_annot_line_ending_styles(annot, start, end)
  1287. else:
  1288. message_warning("bad annot type for line ends")
  1289. def set_name(self, name):
  1290. """Set /Name (icon) of annotation."""
  1291. CheckParent(self)
  1292. annot = self.this
  1293. annot_obj = mupdf.pdf_annot_obj(annot)
  1294. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('Name'), name)
  1295. def set_oc(self, oc=0):
  1296. """Set / remove annotation OC xref."""
  1297. CheckParent(self)
  1298. annot = self.this
  1299. annot_obj = mupdf.pdf_annot_obj(annot)
  1300. if not oc:
  1301. mupdf.pdf_dict_del(annot_obj, PDF_NAME('OC'))
  1302. else:
  1303. JM_add_oc_object(mupdf.pdf_get_bound_document(annot_obj), annot_obj, oc)
  1304. def set_opacity(self, opacity):
  1305. """Set opacity."""
  1306. CheckParent(self)
  1307. annot = self.this
  1308. if not _INRANGE(opacity, 0.0, 1.0):
  1309. mupdf.pdf_set_annot_opacity(annot, 1)
  1310. return
  1311. mupdf.pdf_set_annot_opacity(annot, opacity)
  1312. if opacity < 1.0:
  1313. page = _pdf_annot_page(annot)
  1314. page.transparency = 1
  1315. def set_open(self, is_open):
  1316. """Set 'open' status of annotation or its Popup."""
  1317. CheckParent(self)
  1318. annot = self.this
  1319. mupdf.pdf_set_annot_is_open(annot, is_open)
  1320. def set_popup(self, rect):
  1321. '''
  1322. Create annotation 'Popup' or update rectangle.
  1323. '''
  1324. CheckParent(self)
  1325. annot = self.this
  1326. pdfpage = _pdf_annot_page(annot)
  1327. rot = JM_rotate_page_matrix(pdfpage)
  1328. r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
  1329. mupdf.pdf_set_annot_popup(annot, r)
  1330. def set_rect(self, rect):
  1331. """Set annotation rectangle."""
  1332. CheckParent(self)
  1333. annot = self.this
  1334. pdfpage = _pdf_annot_page(annot)
  1335. rot = JM_rotate_page_matrix(pdfpage)
  1336. r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
  1337. if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r):
  1338. raise ValueError( MSG_BAD_RECT)
  1339. try:
  1340. mupdf.pdf_set_annot_rect(annot, r)
  1341. except Exception as e:
  1342. message(f'cannot set rect: {e}')
  1343. return False
  1344. def set_rotation(self, rotate=0):
  1345. """Set annotation rotation."""
  1346. CheckParent(self)
  1347. annot = self.this
  1348. type = mupdf.pdf_annot_type(annot)
  1349. if type not in (
  1350. mupdf.PDF_ANNOT_CARET,
  1351. mupdf.PDF_ANNOT_CIRCLE,
  1352. mupdf.PDF_ANNOT_FREE_TEXT,
  1353. mupdf.PDF_ANNOT_FILE_ATTACHMENT,
  1354. mupdf.PDF_ANNOT_INK,
  1355. mupdf.PDF_ANNOT_LINE,
  1356. mupdf.PDF_ANNOT_POLY_LINE,
  1357. mupdf.PDF_ANNOT_POLYGON,
  1358. mupdf.PDF_ANNOT_SQUARE,
  1359. mupdf.PDF_ANNOT_STAMP,
  1360. mupdf.PDF_ANNOT_TEXT,
  1361. ):
  1362. return
  1363. rot = rotate
  1364. while rot < 0:
  1365. rot += 360
  1366. while rot >= 360:
  1367. rot -= 360
  1368. if type == mupdf.PDF_ANNOT_FREE_TEXT and rot % 90 != 0:
  1369. rot = 0
  1370. annot_obj = mupdf.pdf_annot_obj(annot)
  1371. mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rot)
  1372. @property
  1373. def type(self):
  1374. """annotation type"""
  1375. CheckParent(self)
  1376. if not self.this.m_internal:
  1377. return 'null'
  1378. type_ = mupdf.pdf_annot_type(self.this)
  1379. c = mupdf.pdf_string_from_annot_type(type_)
  1380. o = mupdf.pdf_dict_gets( mupdf.pdf_annot_obj(self.this), 'IT')
  1381. if not o.m_internal or mupdf.pdf_is_name(o):
  1382. return (type_, c)
  1383. it = mupdf.pdf_to_name(o)
  1384. return (type_, c, it)
  1385. def update(self,
  1386. blend_mode: OptStr =None,
  1387. opacity: OptFloat =None,
  1388. fontsize: float =0,
  1389. fontname: OptStr =None,
  1390. text_color: OptSeq =None,
  1391. border_color: OptSeq =None,
  1392. fill_color: OptSeq =None,
  1393. cross_out: bool =True,
  1394. rotate: int =-1,
  1395. ):
  1396. """Update annot appearance.
  1397. Notes:
  1398. Depending on the annot type, some parameters make no sense,
  1399. while others are only available in this method to achieve the
  1400. desired result. This is especially true for 'FreeText' annots.
  1401. Args:
  1402. blend_mode: set the blend mode, all annotations.
  1403. opacity: set the opacity, all annotations.
  1404. fontsize: set fontsize, 'FreeText' only.
  1405. fontname: set the font, 'FreeText' only.
  1406. border_color: set border color, 'FreeText' only.
  1407. text_color: set text color, 'FreeText' only.
  1408. fill_color: set fill color, all annotations.
  1409. cross_out: draw diagonal lines, 'Redact' only.
  1410. rotate: set rotation, 'FreeText' and some others.
  1411. """
  1412. annot_obj = mupdf.pdf_annot_obj(self.this)
  1413. if border_color:
  1414. is_rich_text = mupdf.pdf_dict_get(annot_obj, PDF_NAME("RC"))
  1415. if not is_rich_text:
  1416. raise ValueError("cannot set border_color if rich_text is False")
  1417. Annot.update_timing_test()
  1418. CheckParent(self)
  1419. def color_string(cs, code):
  1420. """Return valid PDF color operator for a given color sequence.
  1421. """
  1422. cc = ColorCode(cs, code)
  1423. if not cc:
  1424. return b""
  1425. return (cc + "\n").encode()
  1426. annot_type = self.type[0] # get the annot type
  1427. dt = self.border.get("dashes", None) # get the dashes spec
  1428. bwidth = self.border.get("width", -1) # get border line width
  1429. stroke = self.colors["stroke"] # get the stroke color
  1430. if fill_color is not None:
  1431. fill = fill_color
  1432. else:
  1433. fill = self.colors["fill"]
  1434. rect = None # self.rect # prevent MuPDF fiddling with it
  1435. apnmat = self.apn_matrix # prevent MuPDF fiddling with it
  1436. if rotate != -1: # sanitize rotation value
  1437. while rotate < 0:
  1438. rotate += 360
  1439. while rotate >= 360:
  1440. rotate -= 360
  1441. if annot_type == mupdf.PDF_ANNOT_FREE_TEXT and rotate % 90 != 0:
  1442. rotate = 0
  1443. #------------------------------------------------------------------
  1444. # handle opacity and blend mode
  1445. #------------------------------------------------------------------
  1446. if blend_mode is None:
  1447. blend_mode = self.blendmode
  1448. if not hasattr(opacity, "__float__"):
  1449. opacity = self.opacity
  1450. if 0 <= opacity < 1 or blend_mode:
  1451. opa_code = "/H gs\n" # then we must reference this 'gs'
  1452. else:
  1453. opa_code = ""
  1454. if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
  1455. CheckColor(text_color)
  1456. CheckColor(fill_color)
  1457. tcol, fname, fsize = TOOLS._parse_da(self)
  1458. # read and update default appearance as necessary
  1459. if fsize <= 0:
  1460. fsize = 12
  1461. if text_color:
  1462. tcol = text_color
  1463. if fontname:
  1464. fname = fontname
  1465. if fontsize > 0:
  1466. fsize = fontsize
  1467. JM_make_annot_DA(self, len(tcol), tcol, fname, fsize)
  1468. blend_mode = None # not supported for free text annotations!
  1469. #------------------------------------------------------------------
  1470. # now invoke MuPDF to update the annot appearance
  1471. #------------------------------------------------------------------
  1472. val = self._update_appearance(
  1473. opacity=opacity,
  1474. blend_mode=blend_mode,
  1475. fill_color=fill,
  1476. rotate=rotate,
  1477. )
  1478. if val is False:
  1479. raise RuntimeError("Error updating annotation.")
  1480. if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
  1481. # in absence of previous opacity, we may need to modify the AP
  1482. ap = self._getAP()
  1483. if 0 <= opacity < 1 and not ap.startswith(b"/H gs"):
  1484. self._setAP(b"/H gs\n" + ap)
  1485. return
  1486. bfill = color_string(fill, "f")
  1487. bstroke = color_string(stroke, "c")
  1488. p_ctm = self.get_parent().transformation_matrix
  1489. imat = ~p_ctm # inverse page transf. matrix
  1490. if dt:
  1491. dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n"
  1492. dashes = dashes.encode("utf-8")
  1493. else:
  1494. dashes = None
  1495. if self.line_ends:
  1496. line_end_le, line_end_ri = self.line_ends
  1497. else:
  1498. line_end_le, line_end_ri = 0, 0 # init line end codes
  1499. # read contents as created by MuPDF
  1500. ap = self._getAP()
  1501. ap_tab = ap.splitlines() # split in single lines
  1502. ap_updated = False # assume we did nothing
  1503. if annot_type == mupdf.PDF_ANNOT_REDACT:
  1504. if cross_out: # create crossed-out rect
  1505. ap_updated = True
  1506. ap_tab = ap_tab[:-1]
  1507. _, LL, LR, UR, UL = ap_tab
  1508. ap_tab.append(LR)
  1509. ap_tab.append(LL)
  1510. ap_tab.append(UR)
  1511. ap_tab.append(LL)
  1512. ap_tab.append(UL)
  1513. ap_tab.append(b"S")
  1514. if bwidth > 0 or bstroke != b"":
  1515. ap_updated = True
  1516. ntab = [_format_g(bwidth).encode() + b" w"] if bwidth > 0 else []
  1517. for line in ap_tab:
  1518. if line.endswith(b"w"):
  1519. continue
  1520. if line.endswith(b"RG") and bstroke != b"":
  1521. line = bstroke[:-1]
  1522. ntab.append(line)
  1523. ap_tab = ntab
  1524. ap = b"\n".join(ap_tab)
  1525. if annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
  1526. ap = b"\n".join(ap_tab[:-1]) + b"\n"
  1527. ap_updated = True
  1528. if bfill != b"":
  1529. if annot_type == mupdf.PDF_ANNOT_POLYGON:
  1530. ap = ap + bfill + b"b" # close, fill, and stroke
  1531. elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
  1532. ap = ap + b"S" # stroke
  1533. else:
  1534. if annot_type == mupdf.PDF_ANNOT_POLYGON:
  1535. ap = ap + b"s" # close and stroke
  1536. elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
  1537. ap = ap + b"S" # stroke
  1538. if dashes is not None: # handle dashes
  1539. ap = dashes + ap
  1540. # reset dashing - only applies for LINE annots with line ends given
  1541. ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1)
  1542. ap_updated = True
  1543. if opa_code:
  1544. ap = opa_code.encode("utf-8") + ap
  1545. ap_updated = True
  1546. ap = b"q\n" + ap + b"\nQ\n"
  1547. #----------------------------------------------------------------------
  1548. # the following handles line end symbols for 'Polygon' and 'Polyline'
  1549. #----------------------------------------------------------------------
  1550. if line_end_le + line_end_ri > 0 and annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
  1551. le_funcs = (None, TOOLS._le_square, TOOLS._le_circle,
  1552. TOOLS._le_diamond, TOOLS._le_openarrow,
  1553. TOOLS._le_closedarrow, TOOLS._le_butt,
  1554. TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow,
  1555. TOOLS._le_slash)
  1556. le_funcs_range = range(1, len(le_funcs))
  1557. d = 2 * max(1, self.border["width"])
  1558. rect = self.rect + (-d, -d, d, d)
  1559. ap_updated = True
  1560. points = self.vertices
  1561. if line_end_le in le_funcs_range:
  1562. p1 = Point(points[0]) * imat
  1563. p2 = Point(points[1]) * imat
  1564. left = le_funcs[line_end_le](self, p1, p2, False, fill_color)
  1565. ap += left.encode()
  1566. if line_end_ri in le_funcs_range:
  1567. p1 = Point(points[-2]) * imat
  1568. p2 = Point(points[-1]) * imat
  1569. left = le_funcs[line_end_ri](self, p1, p2, True, fill_color)
  1570. ap += left.encode()
  1571. if ap_updated:
  1572. if rect: # rect modified here?
  1573. self.set_rect(rect)
  1574. self._setAP(ap, rect=1)
  1575. else:
  1576. self._setAP(ap, rect=0)
  1577. #-------------------------------
  1578. # handle annotation rotations
  1579. #-------------------------------
  1580. if annot_type not in ( # only these types are supported
  1581. mupdf.PDF_ANNOT_CARET,
  1582. mupdf.PDF_ANNOT_CIRCLE,
  1583. mupdf.PDF_ANNOT_FILE_ATTACHMENT,
  1584. mupdf.PDF_ANNOT_INK,
  1585. mupdf.PDF_ANNOT_LINE,
  1586. mupdf.PDF_ANNOT_POLY_LINE,
  1587. mupdf.PDF_ANNOT_POLYGON,
  1588. mupdf.PDF_ANNOT_SQUARE,
  1589. mupdf.PDF_ANNOT_STAMP,
  1590. mupdf.PDF_ANNOT_TEXT,
  1591. ):
  1592. return
  1593. rot = self.rotation # get value from annot object
  1594. if rot == -1: # nothing to change
  1595. return
  1596. M = (self.rect.tl + self.rect.br) / 2 # center of annot rect
  1597. if rot == 0: # undo rotations
  1598. if abs(apnmat - Matrix(1, 1)) < 1e-5:
  1599. return # matrix already is a no-op
  1600. quad = self.rect.morph(M, ~apnmat) # derotate rect
  1601. self.setRect(quad.rect)
  1602. self.set_apn_matrix(Matrix(1, 1)) # appearance matrix = no-op
  1603. return
  1604. mat = Matrix(rot)
  1605. quad = self.rect.morph(M, mat)
  1606. self.set_rect(quad.rect)
  1607. self.set_apn_matrix(apnmat * mat)
  1608. def update_file(self, buffer_=None, filename=None, ufilename=None, desc=None):
  1609. """Update attached file."""
  1610. CheckParent(self)
  1611. annot = self.this
  1612. annot_obj = mupdf.pdf_annot_obj(annot)
  1613. pdf = mupdf.pdf_get_bound_document(annot_obj) # the owning PDF
  1614. type = mupdf.pdf_annot_type(annot)
  1615. if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
  1616. raise TypeError( MSG_BAD_ANNOT_TYPE)
  1617. stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
  1618. # the object for file content
  1619. if not stream.m_internal:
  1620. RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
  1621. fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
  1622. # file content given
  1623. res = JM_BufferFromBytes(buffer_)
  1624. if buffer_ and not res.m_internal:
  1625. raise ValueError( MSG_BAD_BUFFER)
  1626. if res:
  1627. JM_update_stream(pdf, stream, res, 1)
  1628. # adjust /DL and /Size parameters
  1629. len, _ = mupdf.fz_buffer_storage(res)
  1630. l = mupdf.pdf_new_int(len)
  1631. mupdf.pdf_dict_put(stream, PDF_NAME('DL'), l)
  1632. mupdf.pdf_dict_putl(stream, l, PDF_NAME('Params'), PDF_NAME('Size'))
  1633. if filename:
  1634. mupdf.pdf_dict_put_text_string(stream, PDF_NAME('F'), filename)
  1635. mupdf.pdf_dict_put_text_string(fs, PDF_NAME('F'), filename)
  1636. mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), filename)
  1637. mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), filename)
  1638. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('Contents'), filename)
  1639. if ufilename:
  1640. mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), ufilename)
  1641. mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), ufilename)
  1642. if desc:
  1643. mupdf.pdf_dict_put_text_string(stream, PDF_NAME('Desc'), desc)
  1644. mupdf.pdf_dict_put_text_string(fs, PDF_NAME('Desc'), desc)
  1645. @staticmethod
  1646. def update_timing_test():
  1647. total = 0
  1648. for i in range( 30*1000):
  1649. total += i
  1650. return total
  1651. @property
  1652. def vertices(self):
  1653. """annotation vertex points"""
  1654. CheckParent(self)
  1655. annot = self.this
  1656. assert isinstance(annot, mupdf.PdfAnnot)
  1657. annot_obj = mupdf.pdf_annot_obj(annot)
  1658. page = _pdf_annot_page(annot)
  1659. page_ctm = mupdf.FzMatrix() # page transformation matrix
  1660. dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform().
  1661. mupdf.pdf_page_transform(page, dummy, page_ctm)
  1662. derot = JM_derotate_page_matrix(page)
  1663. page_ctm = mupdf.fz_concat(page_ctm, derot)
  1664. #----------------------------------------------------------------
  1665. # The following objects occur in different annotation types.
  1666. # So we are sure that (!o) occurs at most once.
  1667. # Every pair of floats is one point, that needs to be separately
  1668. # transformed with the page transformation matrix.
  1669. #----------------------------------------------------------------
  1670. o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Vertices'))
  1671. if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L'))
  1672. if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints'))
  1673. if not o.m_internal: o = mupdf.pdf_dict_gets(annot_obj, 'CL')
  1674. if o.m_internal:
  1675. # handle lists with 1-level depth
  1676. # weiter
  1677. res = []
  1678. for i in range(0, mupdf.pdf_array_len(o), 2):
  1679. x = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i))
  1680. y = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i+1))
  1681. point = mupdf.FzPoint(x, y)
  1682. point = mupdf.fz_transform_point(point, page_ctm)
  1683. res.append( (point.x, point.y))
  1684. return res
  1685. o = mupdf.pdf_dict_gets(annot_obj, 'InkList')
  1686. if o.m_internal:
  1687. # InkList has 2-level lists
  1688. #inklist:
  1689. res = []
  1690. for i in range(mupdf.pdf_array_len(o)):
  1691. res1 = []
  1692. o1 = mupdf.pdf_array_get(o, i)
  1693. for j in range(0, mupdf.pdf_array_len(o1), 2):
  1694. x = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j))
  1695. y = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j+1))
  1696. point = mupdf.FzPoint(x, y)
  1697. point = mupdf.fz_transform_point(point, page_ctm)
  1698. res1.append( (point.x, point.y))
  1699. res.append(res1)
  1700. return res
  1701. @property
  1702. def xref(self):
  1703. """annotation xref number"""
  1704. CheckParent(self)
  1705. annot = self.this
  1706. return mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot))
  1707. class Archive:
  1708. def __init__( self, *args):
  1709. '''
  1710. Archive(dirname [, path]) - from folder
  1711. Archive(file [, path]) - from file name or object
  1712. Archive(data, name) - from memory item
  1713. Archive() - empty archive
  1714. Archive(archive [, path]) - from archive
  1715. '''
  1716. self._subarchives = list()
  1717. self.this = mupdf.fz_new_multi_archive()
  1718. if args:
  1719. self.add( *args)
  1720. def __repr__( self):
  1721. return f'Archive, sub-archives: {len(self._subarchives)}'
  1722. def _add_arch( self, subarch, path=None):
  1723. mupdf.fz_mount_multi_archive( self.this, subarch, path)
  1724. def _add_dir( self, folder, path=None):
  1725. sub = mupdf.fz_open_directory( folder)
  1726. mupdf.fz_mount_multi_archive( self.this, sub, path)
  1727. def _add_treeitem( self, memory, name, path=None):
  1728. buff = JM_BufferFromBytes( memory)
  1729. sub = mupdf.fz_new_tree_archive( mupdf.FzTree())
  1730. mupdf.fz_tree_archive_add_buffer( sub, name, buff)
  1731. mupdf.fz_mount_multi_archive( self.this, sub, path)
  1732. def _add_ziptarfile( self, filepath, type_, path=None):
  1733. if type_ == 1:
  1734. sub = mupdf.fz_open_zip_archive( filepath)
  1735. else:
  1736. sub = mupdf.fz_open_tar_archive( filepath)
  1737. mupdf.fz_mount_multi_archive( self.this, sub, path)
  1738. def _add_ziptarmemory( self, memory, type_, path=None):
  1739. buff = JM_BufferFromBytes( memory)
  1740. stream = mupdf.fz_open_buffer( buff)
  1741. if type_==1:
  1742. sub = mupdf.fz_open_zip_archive_with_stream( stream)
  1743. else:
  1744. sub = mupdf.fz_open_tar_archive_with_stream( stream)
  1745. mupdf.fz_mount_multi_archive( self.this, sub, path)
  1746. def add( self, content, path=None):
  1747. '''
  1748. Add a sub-archive.
  1749. Args:
  1750. content:
  1751. The content to be added. May be one of:
  1752. `str` - must be path of directory or file.
  1753. `bytes`, `bytearray`, `io.BytesIO` - raw data.
  1754. `zipfile.Zipfile`.
  1755. `tarfile.TarFile`.
  1756. `pymupdf.Archive`.
  1757. A two-item tuple `(data, name)`.
  1758. List or tuple (but not tuple with length 2) of the above.
  1759. path: (str) a "virtual" path name, under which the elements
  1760. of content can be retrieved. Use it to e.g. cope with
  1761. duplicate element names.
  1762. '''
  1763. def is_binary_data(x):
  1764. return isinstance(x, (bytes, bytearray, io.BytesIO))
  1765. def make_subarch(entries, mount, fmt):
  1766. subarch = dict(fmt=fmt, entries=entries, path=mount)
  1767. if fmt != "tree" or self._subarchives == []:
  1768. self._subarchives.append(subarch)
  1769. else:
  1770. ltree = self._subarchives[-1]
  1771. if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]:
  1772. self._subarchives.append(subarch)
  1773. else:
  1774. ltree["entries"].extend(subarch["entries"])
  1775. self._subarchives[-1] = ltree
  1776. if isinstance(content, pathlib.Path):
  1777. content = str(content)
  1778. if isinstance(content, str):
  1779. if os.path.isdir(content):
  1780. self._add_dir(content, path)
  1781. return make_subarch(os.listdir(content), path, 'dir')
  1782. elif os.path.isfile(content):
  1783. assert isinstance(path, str) and path != '', \
  1784. f'Need name for binary content, but {path=}.'
  1785. with open(content) as f:
  1786. ff = f.read()
  1787. self._add_treeitem(ff, path)
  1788. return make_subarch([path], None, 'tree')
  1789. else:
  1790. raise ValueError(f'Not a file or directory: {content!r}')
  1791. elif is_binary_data(content):
  1792. assert isinstance(path, str) and path != '' \
  1793. f'Need name for binary content, but {path=}.'
  1794. self._add_treeitem(content, path)
  1795. return make_subarch([path], None, 'tree')
  1796. elif isinstance(content, zipfile.ZipFile):
  1797. filename = getattr(content, "filename", None)
  1798. if filename is None:
  1799. fp = content.fp.getvalue()
  1800. self._add_ziptarmemory(fp, 1, path)
  1801. else:
  1802. self._add_ziptarfile(filename, 1, path)
  1803. return make_subarch(content.namelist(), path, 'zip')
  1804. elif isinstance(content, tarfile.TarFile):
  1805. filename = getattr(content.fileobj, "name", None)
  1806. if filename is None:
  1807. fp = content.fileobj
  1808. if not isinstance(fp, io.BytesIO):
  1809. fp = fp.fileobj
  1810. self._add_ziptarmemory(fp.getvalue(), 0, path)
  1811. else:
  1812. self._add_ziptarfile(filename, 0, path)
  1813. return make_subarch(content.getnames(), path, 'tar')
  1814. elif isinstance(content, Archive):
  1815. self._add_arch(content, path)
  1816. return make_subarch([], path, 'multi')
  1817. if isinstance(content, tuple) and len(content) == 2:
  1818. # covers the tree item plus path
  1819. data, name = content
  1820. assert isinstance(name, str), f'Unexpected {type(name)=}'
  1821. if is_binary_data(data):
  1822. self._add_treeitem(data, name, path=path)
  1823. elif isinstance(data, str):
  1824. if os.path.isfile(data):
  1825. with open(data, 'rb') as f:
  1826. ff = f.read()
  1827. self._add_treeitem(ff, name, path=path)
  1828. else:
  1829. assert 0, f'Unexpected {type(data)=}.'
  1830. return make_subarch([name], path, 'tree')
  1831. elif hasattr(content, '__getitem__'):
  1832. # Deal with sequence of disparate items.
  1833. for item in content:
  1834. self.add(item, path)
  1835. return
  1836. else:
  1837. raise TypeError(f'Unrecognised type {type(content)}.')
  1838. assert 0
  1839. @property
  1840. def entry_list( self):
  1841. '''
  1842. List of sub archives.
  1843. '''
  1844. return self._subarchives
  1845. def has_entry( self, name):
  1846. return mupdf.fz_has_archive_entry( self.this, name)
  1847. def read_entry( self, name):
  1848. buff = mupdf.fz_read_archive_entry( self.this, name)
  1849. return JM_BinFromBuffer( buff)
  1850. class Xml:
  1851. def __enter__(self):
  1852. return self
  1853. def __exit__(self, *args):
  1854. pass
  1855. def __init__(self, rhs):
  1856. if isinstance(rhs, mupdf.FzXml):
  1857. self.this = rhs
  1858. elif isinstance(rhs, str):
  1859. buff = mupdf.fz_new_buffer_from_copied_data(rhs)
  1860. self.this = mupdf.fz_parse_xml_from_html5(buff)
  1861. else:
  1862. assert 0, f'Unsupported type for rhs: {type(rhs)}'
  1863. def _get_node_tree( self):
  1864. def show_node(node, items, shift):
  1865. while node is not None:
  1866. if node.is_text:
  1867. items.append((shift, f'"{node.text}"'))
  1868. node = node.next
  1869. continue
  1870. items.append((shift, f"({node.tagname}"))
  1871. for k, v in node.get_attributes().items():
  1872. items.append((shift, f"={k} '{v}'"))
  1873. child = node.first_child
  1874. if child:
  1875. items = show_node(child, items, shift + 1)
  1876. items.append((shift, f"){node.tagname}"))
  1877. node = node.next
  1878. return items
  1879. shift = 0
  1880. items = []
  1881. items = show_node(self, items, shift)
  1882. return items
  1883. def add_bullet_list(self):
  1884. """Add bulleted list ("ul" tag)"""
  1885. child = self.create_element("ul")
  1886. self.append_child(child)
  1887. return child
  1888. def add_class(self, text):
  1889. """Set some class via CSS. Replaces complete class spec."""
  1890. cls = self.get_attribute_value("class")
  1891. if cls is not None and text in cls:
  1892. return self
  1893. self.remove_attribute("class")
  1894. if cls is None:
  1895. cls = text
  1896. else:
  1897. cls += " " + text
  1898. self.set_attribute("class", cls)
  1899. return self
  1900. def add_code(self, text=None):
  1901. """Add a "code" tag"""
  1902. child = self.create_element("code")
  1903. if type(text) is str:
  1904. child.append_child(self.create_text_node(text))
  1905. prev = self.span_bottom()
  1906. if prev is None:
  1907. prev = self
  1908. prev.append_child(child)
  1909. return self
  1910. def add_codeblock(self):
  1911. """Add monospaced lines ("pre" node)"""
  1912. child = self.create_element("pre")
  1913. self.append_child(child)
  1914. return child
  1915. def add_description_list(self):
  1916. """Add description list ("dl" tag)"""
  1917. child = self.create_element("dl")
  1918. self.append_child(child)
  1919. return child
  1920. def add_division(self):
  1921. """Add "div" tag"""
  1922. child = self.create_element("div")
  1923. self.append_child(child)
  1924. return child
  1925. def add_header(self, level=1):
  1926. """Add header tag"""
  1927. if level not in range(1, 7):
  1928. raise ValueError("Header level must be in [1, 6]")
  1929. this_tag = self.tagname
  1930. new_tag = f"h{level}"
  1931. child = self.create_element(new_tag)
  1932. if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"):
  1933. self.append_child(child)
  1934. return child
  1935. self.parent.append_child(child)
  1936. return child
  1937. def add_horizontal_line(self):
  1938. """Add horizontal line ("hr" tag)"""
  1939. child = self.create_element("hr")
  1940. self.append_child(child)
  1941. return child
  1942. def add_image(self, name, width=None, height=None, imgfloat=None, align=None):
  1943. """Add image node (tag "img")."""
  1944. child = self.create_element("img")
  1945. if width is not None:
  1946. child.set_attribute("width", f"{width}")
  1947. if height is not None:
  1948. child.set_attribute("height", f"{height}")
  1949. if imgfloat is not None:
  1950. child.set_attribute("style", f"float: {imgfloat}")
  1951. if align is not None:
  1952. child.set_attribute("align", f"{align}")
  1953. child.set_attribute("src", f"{name}")
  1954. self.append_child(child)
  1955. return child
  1956. def add_link(self, href, text=None):
  1957. """Add a hyperlink ("a" tag)"""
  1958. child = self.create_element("a")
  1959. if not isinstance(text, str):
  1960. text = href
  1961. child.set_attribute("href", href)
  1962. child.append_child(self.create_text_node(text))
  1963. prev = self.span_bottom()
  1964. if prev is None:
  1965. prev = self
  1966. prev.append_child(child)
  1967. return self
  1968. def add_list_item(self):
  1969. """Add item ("li" tag) under a (numbered or bulleted) list."""
  1970. if self.tagname not in ("ol", "ul"):
  1971. raise ValueError("cannot add list item to", self.tagname)
  1972. child = self.create_element("li")
  1973. self.append_child(child)
  1974. return child
  1975. def add_number_list(self, start=1, numtype=None):
  1976. """Add numbered list ("ol" tag)"""
  1977. child = self.create_element("ol")
  1978. if start > 1:
  1979. child.set_attribute("start", str(start))
  1980. if numtype is not None:
  1981. child.set_attribute("type", numtype)
  1982. self.append_child(child)
  1983. return child
  1984. def add_paragraph(self):
  1985. """Add "p" tag"""
  1986. child = self.create_element("p")
  1987. if self.tagname != "p":
  1988. self.append_child(child)
  1989. else:
  1990. self.parent.append_child(child)
  1991. return child
  1992. def add_span(self):
  1993. child = self.create_element("span")
  1994. self.append_child(child)
  1995. return child
  1996. def add_style(self, text):
  1997. """Set some style via CSS style. Replaces complete style spec."""
  1998. style = self.get_attribute_value("style")
  1999. if style is not None and text in style:
  2000. return self
  2001. self.remove_attribute("style")
  2002. if style is None:
  2003. style = text
  2004. else:
  2005. style += ";" + text
  2006. self.set_attribute("style", style)
  2007. return self
  2008. def add_subscript(self, text=None):
  2009. """Add a subscript ("sub" tag)"""
  2010. child = self.create_element("sub")
  2011. if type(text) is str:
  2012. child.append_child(self.create_text_node(text))
  2013. prev = self.span_bottom()
  2014. if prev is None:
  2015. prev = self
  2016. prev.append_child(child)
  2017. return self
  2018. def add_superscript(self, text=None):
  2019. """Add a superscript ("sup" tag)"""
  2020. child = self.create_element("sup")
  2021. if type(text) is str:
  2022. child.append_child(self.create_text_node(text))
  2023. prev = self.span_bottom()
  2024. if prev is None:
  2025. prev = self
  2026. prev.append_child(child)
  2027. return self
  2028. def add_text(self, text):
  2029. """Add text. Line breaks are honored."""
  2030. lines = text.splitlines()
  2031. line_count = len(lines)
  2032. prev = self.span_bottom()
  2033. if prev is None:
  2034. prev = self
  2035. for i, line in enumerate(lines):
  2036. prev.append_child(self.create_text_node(line))
  2037. if i < line_count - 1:
  2038. prev.append_child(self.create_element("br"))
  2039. return self
  2040. def append_child( self, child):
  2041. mupdf.fz_dom_append_child( self.this, child.this)
  2042. def append_styled_span(self, style):
  2043. span = self.create_element("span")
  2044. span.add_style(style)
  2045. prev = self.span_bottom()
  2046. if prev is None:
  2047. prev = self
  2048. prev.append_child(span)
  2049. return prev
  2050. def bodytag( self):
  2051. return Xml( mupdf.fz_dom_body( self.this))
  2052. def clone( self):
  2053. ret = mupdf.fz_dom_clone( self.this)
  2054. return Xml( ret)
  2055. @staticmethod
  2056. def color_text(color):
  2057. if type(color) is str:
  2058. return color
  2059. if type(color) is int:
  2060. return f"rgb({sRGB_to_rgb(color)})"
  2061. if type(color) in (tuple, list):
  2062. return f"rgb{tuple(color)}"
  2063. return color
  2064. def create_element( self, tag):
  2065. return Xml( mupdf.fz_dom_create_element( self.this, tag))
  2066. def create_text_node( self, text):
  2067. return Xml( mupdf.fz_dom_create_text_node( self.this, text))
  2068. def debug(self):
  2069. """Print a list of the node tree below self."""
  2070. items = self._get_node_tree()
  2071. for item in items:
  2072. message(" " * item[0] + item[1].replace("\n", "\\n"))
  2073. def find( self, tag, att, match):
  2074. ret = mupdf.fz_dom_find( self.this, tag, att, match)
  2075. if ret.m_internal:
  2076. return Xml( ret)
  2077. def find_next( self, tag, att, match):
  2078. ret = mupdf.fz_dom_find_next( self.this, tag, att, match)
  2079. if ret.m_internal:
  2080. return Xml( ret)
  2081. @property
  2082. def first_child( self):
  2083. if mupdf.fz_xml_text( self.this):
  2084. # text node, has no child.
  2085. return
  2086. ret = mupdf.fz_dom_first_child( self)
  2087. if ret.m_internal:
  2088. return Xml( ret)
  2089. def get_attribute_value( self, key):
  2090. assert key
  2091. return mupdf.fz_dom_attribute( self.this, key)
  2092. def get_attributes( self):
  2093. if mupdf.fz_xml_text( self.this):
  2094. # text node, has no attributes.
  2095. return
  2096. result = dict()
  2097. i = 0
  2098. while 1:
  2099. val, key = mupdf.fz_dom_get_attribute( self.this, i)
  2100. if not val or not key:
  2101. break
  2102. result[ key] = val
  2103. i += 1
  2104. return result
  2105. def insert_after( self, node):
  2106. mupdf.fz_dom_insert_after( self.this, node.this)
  2107. def insert_before( self, node):
  2108. mupdf.fz_dom_insert_before( self.this, node.this)
  2109. def insert_text(self, text):
  2110. lines = text.splitlines()
  2111. line_count = len(lines)
  2112. for i, line in enumerate(lines):
  2113. self.append_child(self.create_text_node(line))
  2114. if i < line_count - 1:
  2115. self.append_child(self.create_element("br"))
  2116. return self
  2117. @property
  2118. def is_text(self):
  2119. """Check if this is a text node."""
  2120. return self.text is not None
  2121. @property
  2122. def last_child(self):
  2123. """Return last child node."""
  2124. child = self.first_child
  2125. if child is None:
  2126. return None
  2127. while True:
  2128. next = child.next
  2129. if not next:
  2130. return child
  2131. child = next
  2132. @property
  2133. def next( self):
  2134. ret = mupdf.fz_dom_next( self.this)
  2135. if ret.m_internal:
  2136. return Xml( ret)
  2137. @property
  2138. def parent( self):
  2139. ret = mupdf.fz_dom_parent( self.this)
  2140. if ret.m_internal:
  2141. return Xml( ret)
  2142. @property
  2143. def previous( self):
  2144. ret = mupdf.fz_dom_previous( self.this)
  2145. if ret.m_internal:
  2146. return Xml( ret)
  2147. def remove( self):
  2148. mupdf.fz_dom_remove( self.this)
  2149. def remove_attribute( self, key):
  2150. assert key
  2151. mupdf.fz_dom_remove_attribute( self.this, key)
  2152. @property
  2153. def root( self):
  2154. return Xml( mupdf.fz_xml_root( self.this))
  2155. def set_align(self, align):
  2156. """Set text alignment via CSS style"""
  2157. text = "text-align: %s"
  2158. if isinstance( align, str):
  2159. t = align
  2160. elif align == TEXT_ALIGN_LEFT:
  2161. t = "left"
  2162. elif align == TEXT_ALIGN_CENTER:
  2163. t = "center"
  2164. elif align == TEXT_ALIGN_RIGHT:
  2165. t = "right"
  2166. elif align == TEXT_ALIGN_JUSTIFY:
  2167. t = "justify"
  2168. else:
  2169. raise ValueError(f"Unrecognised {align=}")
  2170. text = text % t
  2171. self.add_style(text)
  2172. return self
  2173. def set_attribute( self, key, value):
  2174. assert key
  2175. mupdf.fz_dom_add_attribute( self.this, key, value)
  2176. def set_bgcolor(self, color):
  2177. """Set background color via CSS style"""
  2178. text = f"background-color: %s" % self.color_text(color)
  2179. self.add_style(text) # does not work on span level
  2180. return self
  2181. def set_bold(self, val=True):
  2182. """Set bold on / off via CSS style"""
  2183. if val:
  2184. val="bold"
  2185. else:
  2186. val="normal"
  2187. text = "font-weight: %s" % val
  2188. self.append_styled_span(text)
  2189. return self
  2190. def set_color(self, color):
  2191. """Set text color via CSS style"""
  2192. text = f"color: %s" % self.color_text(color)
  2193. self.append_styled_span(text)
  2194. return self
  2195. def set_columns(self, cols):
  2196. """Set number of text columns via CSS style"""
  2197. text = f"columns: {cols}"
  2198. self.append_styled_span(text)
  2199. return self
  2200. def set_font(self, font):
  2201. """Set font-family name via CSS style"""
  2202. text = "font-family: %s" % font
  2203. self.append_styled_span(text)
  2204. return self
  2205. def set_fontsize(self, fontsize):
  2206. """Set font size name via CSS style"""
  2207. if type(fontsize) is str:
  2208. px=""
  2209. else:
  2210. px="px"
  2211. text = f"font-size: {fontsize}{px}"
  2212. self.append_styled_span(text)
  2213. return self
  2214. def set_id(self, unique):
  2215. """Set a unique id."""
  2216. # check uniqueness
  2217. root = self.root
  2218. if root.find(None, "id", unique):
  2219. raise ValueError(f"id '{unique}' already exists")
  2220. self.set_attribute("id", unique)
  2221. return self
  2222. def set_italic(self, val=True):
  2223. """Set italic on / off via CSS style"""
  2224. if val:
  2225. val="italic"
  2226. else:
  2227. val="normal"
  2228. text = "font-style: %s" % val
  2229. self.append_styled_span(text)
  2230. return self
  2231. def set_leading(self, leading):
  2232. """Set inter-line spacing value via CSS style - block-level only."""
  2233. text = f"-mupdf-leading: {leading}"
  2234. self.add_style(text)
  2235. return self
  2236. def set_letter_spacing(self, spacing):
  2237. """Set inter-letter spacing value via CSS style"""
  2238. text = f"letter-spacing: {spacing}"
  2239. self.append_styled_span(text)
  2240. return self
  2241. def set_lineheight(self, lineheight):
  2242. """Set line height name via CSS style - block-level only."""
  2243. text = f"line-height: {lineheight}"
  2244. self.add_style(text)
  2245. return self
  2246. def set_margins(self, val):
  2247. """Set margin values via CSS style"""
  2248. text = "margins: %s" % val
  2249. self.append_styled_span(text)
  2250. return self
  2251. def set_opacity(self, opacity):
  2252. """Set opacity via CSS style"""
  2253. text = f"opacity: {opacity}"
  2254. self.append_styled_span(text)
  2255. return self
  2256. def set_pagebreak_after(self):
  2257. """Insert a page break after this node."""
  2258. text = "page-break-after: always"
  2259. self.add_style(text)
  2260. return self
  2261. def set_pagebreak_before(self):
  2262. """Insert a page break before this node."""
  2263. text = "page-break-before: always"
  2264. self.add_style(text)
  2265. return self
  2266. def set_properties(
  2267. self,
  2268. align=None,
  2269. bgcolor=None,
  2270. bold=None,
  2271. color=None,
  2272. columns=None,
  2273. font=None,
  2274. fontsize=None,
  2275. indent=None,
  2276. italic=None,
  2277. leading=None,
  2278. letter_spacing=None,
  2279. lineheight=None,
  2280. margins=None,
  2281. pagebreak_after=None,
  2282. pagebreak_before=None,
  2283. word_spacing=None,
  2284. unqid=None,
  2285. cls=None,
  2286. ):
  2287. """Set any or all properties of a node.
  2288. To be used for existing nodes preferably.
  2289. """
  2290. root = self.root
  2291. temp = root.add_division()
  2292. if align is not None:
  2293. temp.set_align(align)
  2294. if bgcolor is not None:
  2295. temp.set_bgcolor(bgcolor)
  2296. if bold is not None:
  2297. temp.set_bold(bold)
  2298. if color is not None:
  2299. temp.set_color(color)
  2300. if columns is not None:
  2301. temp.set_columns(columns)
  2302. if font is not None:
  2303. temp.set_font(font)
  2304. if fontsize is not None:
  2305. temp.set_fontsize(fontsize)
  2306. if indent is not None:
  2307. temp.set_text_indent(indent)
  2308. if italic is not None:
  2309. temp.set_italic(italic)
  2310. if leading is not None:
  2311. temp.set_leading(leading)
  2312. if letter_spacing is not None:
  2313. temp.set_letter_spacing(letter_spacing)
  2314. if lineheight is not None:
  2315. temp.set_lineheight(lineheight)
  2316. if margins is not None:
  2317. temp.set_margins(margins)
  2318. if pagebreak_after is not None:
  2319. temp.set_pagebreak_after()
  2320. if pagebreak_before is not None:
  2321. temp.set_pagebreak_before()
  2322. if word_spacing is not None:
  2323. temp.set_word_spacing(word_spacing)
  2324. if unqid is not None:
  2325. self.set_id(unqid)
  2326. if cls is not None:
  2327. self.add_class(cls)
  2328. styles = []
  2329. top_style = temp.get_attribute_value("style")
  2330. if top_style is not None:
  2331. styles.append(top_style)
  2332. child = temp.first_child
  2333. while child:
  2334. styles.append(child.get_attribute_value("style"))
  2335. child = child.first_child
  2336. self.set_attribute("style", ";".join(styles))
  2337. temp.remove()
  2338. return self
  2339. def set_text_indent(self, indent):
  2340. """Set text indentation name via CSS style - block-level only."""
  2341. text = f"text-indent: {indent}"
  2342. self.add_style(text)
  2343. return self
  2344. def set_underline(self, val="underline"):
  2345. text = "text-decoration: %s" % val
  2346. self.append_styled_span(text)
  2347. return self
  2348. def set_word_spacing(self, spacing):
  2349. """Set inter-word spacing value via CSS style"""
  2350. text = f"word-spacing: {spacing}"
  2351. self.append_styled_span(text)
  2352. return self
  2353. def span_bottom(self):
  2354. """Find deepest level in stacked spans."""
  2355. parent = self
  2356. child = self.last_child
  2357. if child is None:
  2358. return None
  2359. while child.is_text:
  2360. child = child.previous
  2361. if child is None:
  2362. break
  2363. if child is None or child.tagname != "span":
  2364. return None
  2365. while True:
  2366. if child is None:
  2367. return parent
  2368. if child.tagname in ("a", "sub","sup","body") or child.is_text:
  2369. child = child.next
  2370. continue
  2371. if child.tagname == "span":
  2372. parent = child
  2373. child = child.first_child
  2374. else:
  2375. return parent
  2376. @property
  2377. def tagname( self):
  2378. return mupdf.fz_xml_tag( self.this)
  2379. @property
  2380. def text( self):
  2381. return mupdf.fz_xml_text( self.this)
  2382. add_var = add_code
  2383. add_samp = add_code
  2384. add_kbd = add_code
  2385. class Colorspace:
  2386. def __init__(self, type_):
  2387. """Supported are GRAY, RGB and CMYK."""
  2388. if isinstance( type_, mupdf.FzColorspace):
  2389. self.this = type_
  2390. elif type_ == CS_GRAY:
  2391. self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_GRAY)
  2392. elif type_ == CS_CMYK:
  2393. self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_CMYK)
  2394. elif type_ == CS_RGB:
  2395. self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
  2396. else:
  2397. self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
  2398. def __repr__(self):
  2399. x = ("", "GRAY", "", "RGB", "CMYK")[self.n]
  2400. return "Colorspace(CS_%s) - %s" % (x, self.name)
  2401. def _name(self):
  2402. return mupdf.fz_colorspace_name(self.this)
  2403. @property
  2404. def n(self):
  2405. """Size of one pixel."""
  2406. return mupdf.fz_colorspace_n(self.this)
  2407. @property
  2408. def name(self):
  2409. """Name of the Colorspace."""
  2410. return self._name()
  2411. class DeviceWrapper:
  2412. def __init__(self, *args):
  2413. if args_match( args, mupdf.FzDevice):
  2414. device, = args
  2415. self.this = device
  2416. elif args_match( args, Pixmap, None):
  2417. pm, clip = args
  2418. bbox = JM_irect_from_py( clip)
  2419. if mupdf.fz_is_infinite_irect( bbox):
  2420. self.this = mupdf.fz_new_draw_device( mupdf.FzMatrix(), pm)
  2421. else:
  2422. self.this = mupdf.fz_new_draw_device_with_bbox( mupdf.FzMatrix(), pm, bbox)
  2423. elif args_match( args, mupdf.FzDisplayList):
  2424. dl, = args
  2425. self.this = mupdf.fz_new_list_device( dl)
  2426. elif args_match( args, mupdf.FzStextPage, None):
  2427. tp, flags = args
  2428. opts = mupdf.FzStextOptions( flags)
  2429. self.this = mupdf.fz_new_stext_device( tp, opts)
  2430. else:
  2431. raise Exception( f'Unrecognised args for DeviceWrapper: {args!r}')
  2432. class DisplayList:
  2433. def __del__(self):
  2434. if not type(self) is DisplayList: return
  2435. self.thisown = False
  2436. def __init__(self, *args):
  2437. if len(args) == 1 and isinstance(args[0], mupdf.FzRect):
  2438. self.this = mupdf.FzDisplayList(args[0])
  2439. elif len(args) == 1 and isinstance(args[0], mupdf.FzDisplayList):
  2440. self.this = args[0]
  2441. else:
  2442. assert 0, f'Unrecognised {args=}'
  2443. def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None):
  2444. if isinstance(colorspace, Colorspace):
  2445. colorspace = colorspace.this
  2446. else:
  2447. colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
  2448. val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None)
  2449. val.thisown = True
  2450. return val
  2451. def get_textpage(self, flags=3):
  2452. """Make a TextPage from a DisplayList."""
  2453. stext_options = mupdf.FzStextOptions()
  2454. stext_options.flags = flags
  2455. val = mupdf.FzStextPage(self.this, stext_options)
  2456. val.thisown = True
  2457. return val
  2458. @property
  2459. def rect(self):
  2460. val = JM_py_from_rect(mupdf.fz_bound_display_list(self.this))
  2461. val = Rect(val)
  2462. return val
  2463. def run(self, dw, m, area):
  2464. mupdf.fz_run_display_list(
  2465. self.this,
  2466. dw.device,
  2467. JM_matrix_from_py(m),
  2468. JM_rect_from_py(area),
  2469. mupdf.FzCookie(),
  2470. )
  2471. if g_use_extra:
  2472. extra_FzDocument_insert_pdf = extra.FzDocument_insert_pdf
  2473. class Document:
  2474. def __contains__(self, loc) -> bool:
  2475. if type(loc) is int:
  2476. if loc < self.page_count:
  2477. return True
  2478. return False
  2479. if type(loc) not in (tuple, list) or len(loc) != 2:
  2480. return False
  2481. chapter, pno = loc
  2482. if (0
  2483. or not isinstance(chapter, int)
  2484. or chapter < 0
  2485. or chapter >= self.chapter_count
  2486. ):
  2487. return False
  2488. if (0
  2489. or not isinstance(pno, int)
  2490. or pno < 0
  2491. or pno >= self.chapter_page_count(chapter)
  2492. ):
  2493. return False
  2494. return True
  2495. def __delitem__(self, i)->None:
  2496. if not self.is_pdf:
  2497. raise ValueError("is no PDF")
  2498. if type(i) is int:
  2499. return self.delete_page(i)
  2500. if type(i) in (list, tuple, range):
  2501. return self.delete_pages(i)
  2502. if type(i) is not slice:
  2503. raise ValueError("bad argument type")
  2504. pc = self.page_count
  2505. start = i.start if i.start else 0
  2506. stop = i.stop if i.stop else pc
  2507. step = i.step if i.step else 1
  2508. while start < 0:
  2509. start += pc
  2510. if start >= pc:
  2511. raise ValueError("bad page number(s)")
  2512. while stop < 0:
  2513. stop += pc
  2514. if stop > pc:
  2515. raise ValueError("bad page number(s)")
  2516. return self.delete_pages(range(start, stop, step))
  2517. def __enter__(self):
  2518. return self
  2519. def __exit__(self, *args):
  2520. self.close()
  2521. @typing.overload
  2522. def __getitem__(self, i: int = 0) -> Page:
  2523. ...
  2524. if sys.version_info >= (3, 9):
  2525. @typing.overload
  2526. def __getitem__(self, i: slice) -> list[Page]:
  2527. ...
  2528. @typing.overload
  2529. def __getitem__(self, i: tuple[int, int]) -> Page:
  2530. ...
  2531. def __getitem__(self, i=0):
  2532. if isinstance(i, slice):
  2533. return [self[j] for j in range(*i.indices(len(self)))]
  2534. assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)), \
  2535. f'Invalid item number: {i=}.'
  2536. if i not in self:
  2537. raise IndexError(f"page {i} not in document")
  2538. return self.load_page(i)
  2539. def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0, height=0, fontsize=11):
  2540. """Creates a document. Use 'open' as a synonym.
  2541. Notes:
  2542. Basic usages:
  2543. open() - new PDF document
  2544. open(filename) - string or pathlib.Path, must have supported
  2545. file extension.
  2546. open(type, buffer) - type: valid extension, buffer: bytes object.
  2547. open(stream=buffer, filetype=type) - keyword version of previous.
  2548. open(filename, fileype=type) - filename with unrecognized extension.
  2549. rect, width, height, fontsize: layout reflowable document
  2550. on open (e.g. EPUB). Ignored if n/a.
  2551. """
  2552. # We temporarily set JM_mupdf_show_errors=0 while we are constructing,
  2553. # then restore its original value in a `finally:` block.
  2554. #
  2555. global JM_mupdf_show_errors
  2556. JM_mupdf_show_errors_old = JM_mupdf_show_errors
  2557. JM_mupdf_show_errors = 0
  2558. try:
  2559. self.is_closed = False
  2560. self.is_encrypted = False
  2561. self.is_encrypted = False
  2562. self.metadata = None
  2563. self.FontInfos = []
  2564. self.Graftmaps = {}
  2565. self.ShownPages = {}
  2566. self.InsertedImages = {}
  2567. self._page_refs = weakref.WeakValueDictionary()
  2568. if isinstance(filename, mupdf.PdfDocument):
  2569. pdf_document = filename
  2570. self.this = pdf_document
  2571. self.this_is_pdf = True
  2572. return
  2573. w = width
  2574. h = height
  2575. r = JM_rect_from_py(rect)
  2576. if not mupdf.fz_is_infinite_rect(r):
  2577. w = r.x1 - r.x0
  2578. h = r.y1 - r.y0
  2579. self._name = filename
  2580. self.stream = stream
  2581. if stream is not None:
  2582. if filename is not None and filetype is None:
  2583. # 2025-05-06: Use <filename> as the filetype. This is
  2584. # reversing precedence - we used to use <filename> if both
  2585. # were set.
  2586. filetype = filename
  2587. if isinstance(stream, (bytes, memoryview)):
  2588. pass
  2589. elif isinstance(stream, bytearray):
  2590. stream = bytes(stream)
  2591. elif isinstance(stream, io.BytesIO):
  2592. stream = stream.getvalue()
  2593. else:
  2594. raise TypeError(f"bad stream: {type(stream)=}.")
  2595. self.stream = stream
  2596. assert isinstance(stream, (bytes, memoryview))
  2597. if len(stream) == 0:
  2598. # MuPDF raise an exception for this but also generates
  2599. # warnings, which is not very helpful for us. So instead we
  2600. # raise a specific exception.
  2601. raise EmptyFileError('Cannot open empty stream.')
  2602. stream2 = mupdf.fz_open_memory(mupdf.python_buffer_data(stream), len(stream))
  2603. try:
  2604. doc = mupdf.fz_open_document_with_stream(filetype if filetype else '', stream2)
  2605. except Exception as e:
  2606. if g_exceptions_verbose > 1: exception_info()
  2607. raise FileDataError('Failed to open stream') from e
  2608. elif filename:
  2609. assert not stream
  2610. if isinstance(filename, str):
  2611. pass
  2612. elif hasattr(filename, "absolute"):
  2613. filename = str(filename)
  2614. elif hasattr(filename, "name"):
  2615. filename = filename.name
  2616. else:
  2617. raise TypeError(f"bad filename: {type(filename)=} {filename=}.")
  2618. self._name = filename
  2619. # Generate our own specific exceptions. This avoids MuPDF
  2620. # generating warnings etc.
  2621. if not os.path.exists(filename):
  2622. raise FileNotFoundError(f"no such file: '{filename}'")
  2623. elif not os.path.isfile(filename):
  2624. raise FileDataError(f"'{filename}' is no file")
  2625. elif os.path.getsize(filename) == 0:
  2626. raise EmptyFileError(f'Cannot open empty file: {filename=}.')
  2627. if filetype:
  2628. # Override the type implied by <filename>. MuPDF does not
  2629. # have a way to do this directly so we open via a stream.
  2630. try:
  2631. fz_stream = mupdf.fz_open_file(filename)
  2632. doc = mupdf.fz_open_document_with_stream(filetype, fz_stream)
  2633. except Exception as e:
  2634. if g_exceptions_verbose > 1: exception_info()
  2635. raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e
  2636. else:
  2637. try:
  2638. doc = mupdf.fz_open_document(filename)
  2639. except Exception as e:
  2640. if g_exceptions_verbose > 1: exception_info()
  2641. raise FileDataError(f'Failed to open file {filename!r}.') from e
  2642. else:
  2643. pdf = mupdf.PdfDocument()
  2644. doc = mupdf.FzDocument(pdf)
  2645. if w > 0 and h > 0:
  2646. mupdf.fz_layout_document(doc, w, h, fontsize)
  2647. elif mupdf.fz_is_document_reflowable(doc):
  2648. mupdf.fz_layout_document(doc, 400, 600, 11)
  2649. self.this = doc
  2650. # fixme: not sure where self.thisown gets initialised in PyMuPDF.
  2651. #
  2652. self.thisown = True
  2653. if self.thisown:
  2654. self._graft_id = TOOLS.gen_id()
  2655. if self.needs_pass:
  2656. self.is_encrypted = True
  2657. else: # we won't init until doc is decrypted
  2658. self.init_doc()
  2659. # the following hack detects invalid/empty SVG files, which else may lead
  2660. # to interpreter crashes
  2661. if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower():
  2662. try:
  2663. _ = self.convert_to_pdf() # this seems to always work
  2664. except Exception as e:
  2665. if g_exceptions_verbose > 1: exception_info()
  2666. raise FileDataError("cannot open broken document") from e
  2667. if g_use_extra:
  2668. self.this_is_pdf = isinstance( self.this, mupdf.PdfDocument)
  2669. if self.this_is_pdf:
  2670. self.page_count2 = extra.page_count_pdf
  2671. else:
  2672. self.page_count2 = extra.page_count_fz
  2673. finally:
  2674. JM_mupdf_show_errors = JM_mupdf_show_errors_old
  2675. def __len__(self) -> int:
  2676. return self.page_count
  2677. def __repr__(self) -> str:
  2678. m = "closed " if self.is_closed else ""
  2679. if self.stream is None:
  2680. if self.name == "":
  2681. return m + "Document(<new PDF, doc# %i>)" % self._graft_id
  2682. return m + "Document('%s')" % (self.name,)
  2683. return m + "Document('%s', <memory, doc# %i>)" % (self.name, self._graft_id)
  2684. def _addFormFont(self, name, font):
  2685. """Add new form font."""
  2686. if self.is_closed or self.is_encrypted:
  2687. raise ValueError("document closed or encrypted")
  2688. pdf = _as_pdf_document(self, required=0)
  2689. if not pdf.m_internal:
  2690. return
  2691. fonts = mupdf.pdf_dict_getl(
  2692. mupdf.pdf_trailer( pdf),
  2693. PDF_NAME('Root'),
  2694. PDF_NAME('AcroForm'),
  2695. PDF_NAME('DR'),
  2696. PDF_NAME('Font'),
  2697. )
  2698. if not fonts.m_internal or not mupdf.pdf_is_dict( fonts):
  2699. raise RuntimeError( "PDF has no form fonts yet")
  2700. k = mupdf.pdf_new_name( name)
  2701. v = JM_pdf_obj_from_str( pdf, font)
  2702. mupdf.pdf_dict_put( fonts, k, v)
  2703. def _delToC(self):
  2704. """Delete the TOC."""
  2705. if self.is_closed or self.is_encrypted:
  2706. raise ValueError("document closed or encrypted")
  2707. xrefs = [] # create Python list
  2708. pdf = _as_pdf_document(self, required=0)
  2709. if not pdf.m_internal:
  2710. return xrefs # not a pdf
  2711. # get the main root
  2712. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  2713. # get the outline root
  2714. olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
  2715. if not olroot.m_internal:
  2716. return xrefs # no outlines or some problem
  2717. first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) # first outline
  2718. xrefs = JM_outline_xrefs(first, xrefs)
  2719. xref_count = len(xrefs)
  2720. olroot_xref = mupdf.pdf_to_num(olroot) # delete OL root
  2721. mupdf.pdf_delete_object(pdf, olroot_xref) # delete OL root
  2722. mupdf.pdf_dict_del(root, PDF_NAME('Outlines')) # delete OL root
  2723. for i in range(xref_count):
  2724. _, xref = JM_INT_ITEM(xrefs, i)
  2725. mupdf.pdf_delete_object(pdf, xref) # delete outline item
  2726. xrefs.append(olroot_xref)
  2727. val = xrefs
  2728. self.init_doc()
  2729. return val
  2730. def _delete_page(self, pno):
  2731. pdf = _as_pdf_document(self)
  2732. mupdf.pdf_delete_page( pdf, pno)
  2733. if pdf.m_internal.rev_page_map:
  2734. mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
  2735. def _deleteObject(self, xref):
  2736. """Delete object."""
  2737. pdf = _as_pdf_document(self)
  2738. if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
  2739. raise ValueError( MSG_BAD_XREF)
  2740. mupdf.pdf_delete_object(pdf, xref)
  2741. def _embeddedFileGet(self, idx):
  2742. pdf = _as_pdf_document(self)
  2743. names = mupdf.pdf_dict_getl(
  2744. mupdf.pdf_trailer(pdf),
  2745. PDF_NAME('Root'),
  2746. PDF_NAME('Names'),
  2747. PDF_NAME('EmbeddedFiles'),
  2748. PDF_NAME('Names'),
  2749. )
  2750. entry = mupdf.pdf_array_get(names, 2*idx+1)
  2751. filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
  2752. buf = mupdf.pdf_load_stream(filespec)
  2753. cont = JM_BinFromBuffer(buf)
  2754. return cont
  2755. def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int:
  2756. filenames = self.embfile_names()
  2757. msg = "'%s' not in EmbeddedFiles array." % str(item)
  2758. if item in filenames:
  2759. idx = filenames.index(item)
  2760. elif item in range(len(filenames)):
  2761. idx = item
  2762. else:
  2763. raise ValueError(msg)
  2764. return idx
  2765. def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None):
  2766. pdf = _as_pdf_document(self)
  2767. data = JM_BufferFromBytes(buffer_)
  2768. if not data.m_internal:
  2769. raise TypeError( MSG_BAD_BUFFER)
  2770. names = mupdf.pdf_dict_getl(
  2771. mupdf.pdf_trailer(pdf),
  2772. PDF_NAME('Root'),
  2773. PDF_NAME('Names'),
  2774. PDF_NAME('EmbeddedFiles'),
  2775. PDF_NAME('Names'),
  2776. )
  2777. if not mupdf.pdf_is_array(names):
  2778. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  2779. names = mupdf.pdf_new_array(pdf, 6) # an even number!
  2780. mupdf.pdf_dict_putl(
  2781. root,
  2782. names,
  2783. PDF_NAME('Names'),
  2784. PDF_NAME('EmbeddedFiles'),
  2785. PDF_NAME('Names'),
  2786. )
  2787. fileentry = JM_embed_file(pdf, data, filename, ufilename, desc, 1)
  2788. xref = mupdf.pdf_to_num(
  2789. mupdf.pdf_dict_getl(fileentry, PDF_NAME('EF'), PDF_NAME('F'))
  2790. )
  2791. mupdf.pdf_array_push(names, mupdf.pdf_new_text_string(name))
  2792. mupdf.pdf_array_push(names, fileentry)
  2793. return xref
  2794. def _embfile_del(self, idx):
  2795. pdf = _as_pdf_document(self)
  2796. names = mupdf.pdf_dict_getl(
  2797. mupdf.pdf_trailer(pdf),
  2798. PDF_NAME('Root'),
  2799. PDF_NAME('Names'),
  2800. PDF_NAME('EmbeddedFiles'),
  2801. PDF_NAME('Names'),
  2802. )
  2803. mupdf.pdf_array_delete(names, idx + 1)
  2804. mupdf.pdf_array_delete(names, idx)
  2805. def _embfile_info(self, idx, infodict):
  2806. pdf = _as_pdf_document(self)
  2807. xref = 0
  2808. ci_xref=0
  2809. trailer = mupdf.pdf_trailer(pdf)
  2810. names = mupdf.pdf_dict_getl(
  2811. trailer,
  2812. PDF_NAME('Root'),
  2813. PDF_NAME('Names'),
  2814. PDF_NAME('EmbeddedFiles'),
  2815. PDF_NAME('Names'),
  2816. )
  2817. o = mupdf.pdf_array_get(names, 2*idx+1)
  2818. ci = mupdf.pdf_dict_get(o, PDF_NAME('CI'))
  2819. if ci.m_internal:
  2820. ci_xref = mupdf.pdf_to_num(ci)
  2821. infodict["collection"] = ci_xref
  2822. name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('F')))
  2823. infodict[dictkey_filename] = JM_EscapeStrFromStr(name)
  2824. name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('UF')))
  2825. infodict[dictkey_ufilename] = JM_EscapeStrFromStr(name)
  2826. name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('Desc')))
  2827. infodict[dictkey_descr] = JM_UnicodeFromStr(name)
  2828. len_ = -1
  2829. DL = -1
  2830. fileentry = mupdf.pdf_dict_getl(o, PDF_NAME('EF'), PDF_NAME('F'))
  2831. xref = mupdf.pdf_to_num(fileentry)
  2832. o = mupdf.pdf_dict_get(fileentry, PDF_NAME('Length'))
  2833. if o.m_internal:
  2834. len_ = mupdf.pdf_to_int(o)
  2835. o = mupdf.pdf_dict_get(fileentry, PDF_NAME('DL'))
  2836. if o.m_internal:
  2837. DL = mupdf.pdf_to_int(o)
  2838. else:
  2839. o = mupdf.pdf_dict_getl(fileentry, PDF_NAME('Params'), PDF_NAME('Size'))
  2840. if o.m_internal:
  2841. DL = mupdf.pdf_to_int(o)
  2842. infodict[dictkey_size] = DL
  2843. infodict[dictkey_length] = len_
  2844. return xref
  2845. def _embfile_names(self, namelist):
  2846. """Get list of embedded file names."""
  2847. pdf = _as_pdf_document(self)
  2848. names = mupdf.pdf_dict_getl(
  2849. mupdf.pdf_trailer(pdf),
  2850. PDF_NAME('Root'),
  2851. PDF_NAME('Names'),
  2852. PDF_NAME('EmbeddedFiles'),
  2853. PDF_NAME('Names'),
  2854. )
  2855. if mupdf.pdf_is_array(names):
  2856. n = mupdf.pdf_array_len(names)
  2857. for i in range(0, n, 2):
  2858. val = JM_EscapeStrFromStr(
  2859. mupdf.pdf_to_text_string(
  2860. mupdf.pdf_array_get(names, i)
  2861. )
  2862. )
  2863. namelist.append(val)
  2864. def _embfile_upd(self, idx, buffer_=None, filename=None, ufilename=None, desc=None):
  2865. pdf = _as_pdf_document(self)
  2866. xref = 0
  2867. names = mupdf.pdf_dict_getl(
  2868. mupdf.pdf_trailer(pdf),
  2869. PDF_NAME('Root'),
  2870. PDF_NAME('Names'),
  2871. PDF_NAME('EmbeddedFiles'),
  2872. PDF_NAME('Names'),
  2873. )
  2874. entry = mupdf.pdf_array_get(names, 2*idx+1)
  2875. filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
  2876. if not filespec.m_internal:
  2877. RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
  2878. res = JM_BufferFromBytes(buffer_)
  2879. if buffer_ and buffer_.m_internal and not res.m_internal:
  2880. raise TypeError( MSG_BAD_BUFFER)
  2881. if res.m_internal and buffer_ and buffer_.m_internal:
  2882. JM_update_stream(pdf, filespec, res, 1)
  2883. # adjust /DL and /Size parameters
  2884. len, _ = mupdf.fz_buffer_storage(res)
  2885. l = mupdf.pdf_new_int(len)
  2886. mupdf.pdf_dict_put(filespec, PDF_NAME('DL'), l)
  2887. mupdf.pdf_dict_putl(filespec, l, PDF_NAME('Params'), PDF_NAME('Size'))
  2888. xref = mupdf.pdf_to_num(filespec)
  2889. if filename:
  2890. mupdf.pdf_dict_put_text_string(entry, PDF_NAME('F'), filename)
  2891. if ufilename:
  2892. mupdf.pdf_dict_put_text_string(entry, PDF_NAME('UF'), ufilename)
  2893. if desc:
  2894. mupdf.pdf_dict_put_text_string(entry, PDF_NAME('Desc'), desc)
  2895. return xref
  2896. def _extend_toc_items(self, items):
  2897. """Add color info to all items of an extended TOC list."""
  2898. if self.is_closed:
  2899. raise ValueError("document closed")
  2900. if g_use_extra:
  2901. return extra.Document_extend_toc_items( self.this, items)
  2902. pdf = _as_pdf_document(self)
  2903. zoom = "zoom"
  2904. bold = "bold"
  2905. italic = "italic"
  2906. collapse = "collapse"
  2907. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  2908. if not root.m_internal:
  2909. return
  2910. olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
  2911. if not olroot.m_internal:
  2912. return
  2913. first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
  2914. if not first.m_internal:
  2915. return
  2916. xrefs = []
  2917. xrefs = JM_outline_xrefs(first, xrefs)
  2918. n = len(xrefs)
  2919. m = len(items)
  2920. if not n:
  2921. return
  2922. if n != m:
  2923. raise IndexError( "internal error finding outline xrefs")
  2924. # update all TOC item dictionaries
  2925. for i in range(n):
  2926. xref = int(xrefs[i])
  2927. item = items[i]
  2928. itemdict = item[3]
  2929. if not isinstance(itemdict, dict):
  2930. raise ValueError( "need non-simple TOC format")
  2931. itemdict[dictkey_xref] = xrefs[i]
  2932. bm = mupdf.pdf_load_object(pdf, xref)
  2933. flags = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('F')))
  2934. if flags == 1:
  2935. itemdict[italic] = True
  2936. elif flags == 2:
  2937. itemdict[bold] = True
  2938. elif flags == 3:
  2939. itemdict[italic] = True
  2940. itemdict[bold] = True
  2941. count = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('Count')))
  2942. if count < 0:
  2943. itemdict[collapse] = True
  2944. elif count > 0:
  2945. itemdict[collapse] = False
  2946. col = mupdf.pdf_dict_get(bm, PDF_NAME('C'))
  2947. if mupdf.pdf_is_array(col) and mupdf.pdf_array_len(col) == 3:
  2948. color = (
  2949. mupdf.pdf_to_real(mupdf.pdf_array_get(col, 0)),
  2950. mupdf.pdf_to_real(mupdf.pdf_array_get(col, 1)),
  2951. mupdf.pdf_to_real(mupdf.pdf_array_get(col, 2)),
  2952. )
  2953. itemdict[dictkey_color] = color
  2954. z=0
  2955. obj = mupdf.pdf_dict_get(bm, PDF_NAME('Dest'))
  2956. if not obj.m_internal or not mupdf.pdf_is_array(obj):
  2957. obj = mupdf.pdf_dict_getl(bm, PDF_NAME('A'), PDF_NAME('D'))
  2958. if mupdf.pdf_is_array(obj) and mupdf.pdf_array_len(obj) == 5:
  2959. z = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, 4))
  2960. itemdict[zoom] = float(z)
  2961. item[3] = itemdict
  2962. items[i] = item
  2963. def _forget_page(self, page: Page):
  2964. """Remove a page from document page dict."""
  2965. pid = id(page)
  2966. if pid in self._page_refs:
  2967. #self._page_refs[pid] = None
  2968. del self._page_refs[pid]
  2969. def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limit: int, idx: int = 0):
  2970. pdf = _as_pdf_document(self)
  2971. mylimit = limit
  2972. if mylimit < 256:
  2973. mylimit = 256
  2974. if ordering >= 0:
  2975. data, size, index = mupdf.fz_lookup_cjk_font(ordering)
  2976. font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
  2977. else:
  2978. data, size = mupdf.fz_lookup_base14_font(bfname)
  2979. if data:
  2980. font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
  2981. else:
  2982. buf = JM_get_fontbuffer(pdf, xref)
  2983. if not buf.m_internal:
  2984. raise Exception("font at xref %d is not supported" % xref)
  2985. font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0)
  2986. wlist = []
  2987. for i in range(mylimit):
  2988. glyph = mupdf.fz_encode_character(font, i)
  2989. adv = mupdf.fz_advance_glyph(font, glyph, 0)
  2990. if ordering >= 0:
  2991. glyph = i
  2992. if glyph > 0:
  2993. wlist.append( (glyph, adv))
  2994. else:
  2995. wlist.append( (glyph, 0.0))
  2996. return wlist
  2997. def _get_page_labels(self):
  2998. pdf = _as_pdf_document(self)
  2999. rc = []
  3000. pagelabels = mupdf.pdf_new_name("PageLabels")
  3001. obj = mupdf.pdf_dict_getl( mupdf.pdf_trailer(pdf), PDF_NAME('Root'), pagelabels)
  3002. if not obj.m_internal:
  3003. return rc
  3004. # simple case: direct /Nums object
  3005. nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Nums')))
  3006. if nums.m_internal:
  3007. JM_get_page_labels(rc, nums)
  3008. return rc
  3009. # case: /Kids/Nums
  3010. nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_getl(obj, PDF_NAME('Kids'), PDF_NAME('Nums')))
  3011. if nums.m_internal:
  3012. JM_get_page_labels(rc, nums)
  3013. return rc
  3014. # case: /Kids is an array of multiple /Nums
  3015. kids = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Kids')))
  3016. if not kids.m_internal or not mupdf.pdf_is_array(kids):
  3017. return rc
  3018. n = mupdf.pdf_array_len(kids)
  3019. for i in range(n):
  3020. nums = mupdf.pdf_resolve_indirect(
  3021. mupdf.pdf_dict_get(
  3022. mupdf.pdf_array_get(kids, i),
  3023. PDF_NAME('Nums'),
  3024. )
  3025. )
  3026. JM_get_page_labels(rc, nums)
  3027. return rc
  3028. def _getMetadata(self, key):
  3029. """Get metadata."""
  3030. try:
  3031. return mupdf.fz_lookup_metadata2( self.this, key)
  3032. except Exception:
  3033. if g_exceptions_verbose > 2: exception_info()
  3034. return ''
  3035. def _getOLRootNumber(self):
  3036. """Get xref of Outline Root, create it if missing."""
  3037. if self.is_closed or self.is_encrypted:
  3038. raise ValueError("document closed or encrypted")
  3039. pdf = _as_pdf_document(self)
  3040. # get main root
  3041. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  3042. # get outline root
  3043. olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
  3044. if not olroot.m_internal:
  3045. olroot = mupdf.pdf_new_dict( pdf, 4)
  3046. mupdf.pdf_dict_put( olroot, PDF_NAME('Type'), PDF_NAME('Outlines'))
  3047. ind_obj = mupdf.pdf_add_object( pdf, olroot)
  3048. mupdf.pdf_dict_put( root, PDF_NAME('Outlines'), ind_obj)
  3049. olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
  3050. return mupdf.pdf_to_num( olroot)
  3051. def _getPDFfileid(self):
  3052. """Get PDF file id."""
  3053. pdf = _as_pdf_document(self, required=0)
  3054. if not pdf.m_internal:
  3055. return
  3056. idlist = []
  3057. identity = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
  3058. if identity.m_internal:
  3059. n = mupdf.pdf_array_len(identity)
  3060. for i in range(n):
  3061. o = mupdf.pdf_array_get(identity, i)
  3062. text = mupdf.pdf_to_text_string(o)
  3063. hex_ = binascii.hexlify(text)
  3064. idlist.append(hex_)
  3065. return idlist
  3066. def _getPageInfo(self, pno, what):
  3067. """List fonts, images, XObjects used on a page."""
  3068. if self.is_closed or self.is_encrypted:
  3069. raise ValueError("document closed or encrypted")
  3070. doc = self.this
  3071. pageCount = mupdf.pdf_count_pages(doc) if isinstance(doc, mupdf.PdfDocument) else mupdf.fz_count_pages(doc)
  3072. n = pno # pno < 0 is allowed
  3073. while n < 0:
  3074. n += pageCount # make it non-negative
  3075. if n >= pageCount:
  3076. raise ValueError( MSG_BAD_PAGENO)
  3077. pdf = _as_pdf_document(self)
  3078. pageref = mupdf.pdf_lookup_page_obj(pdf, n)
  3079. rsrc = mupdf.pdf_dict_get_inheritable(pageref, mupdf.PDF_ENUM_NAME_Resources)
  3080. liste = []
  3081. tracer = []
  3082. if rsrc.m_internal:
  3083. JM_scan_resources(pdf, rsrc, liste, what, 0, tracer)
  3084. return liste
  3085. def _insert_font(self, fontfile=None, fontbuffer=None):
  3086. '''
  3087. Utility: insert font from file or binary.
  3088. '''
  3089. pdf = _as_pdf_document(self)
  3090. if not fontfile and not fontbuffer:
  3091. raise ValueError( MSG_FILE_OR_BUFFER)
  3092. value = JM_insert_font(pdf, None, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1)
  3093. return value
  3094. def _loadOutline(self):
  3095. """Load first outline."""
  3096. doc = self.this
  3097. assert isinstance( doc, mupdf.FzDocument)
  3098. try:
  3099. ol = mupdf.fz_load_outline( doc)
  3100. except Exception:
  3101. if g_exceptions_verbose > 1: exception_info()
  3102. return
  3103. return Outline( ol)
  3104. def _make_page_map(self):
  3105. """Make an array page number -> page object."""
  3106. if self.is_closed:
  3107. raise ValueError("document closed")
  3108. assert 0, f'_make_page_map() is no-op'
  3109. def _move_copy_page(self, pno, nb, before, copy):
  3110. """Move or copy a PDF page reference."""
  3111. pdf = _as_pdf_document(self)
  3112. same = 0
  3113. # get the two page objects -----------------------------------
  3114. # locate the /Kids arrays and indices in each
  3115. page1, parent1, i1 = pdf_lookup_page_loc( pdf, pno)
  3116. kids1 = mupdf.pdf_dict_get( parent1, PDF_NAME('Kids'))
  3117. page2, parent2, i2 = pdf_lookup_page_loc( pdf, nb)
  3118. kids2 = mupdf.pdf_dict_get( parent2, PDF_NAME('Kids'))
  3119. if before: # calc index of source page in target /Kids
  3120. pos = i2
  3121. else:
  3122. pos = i2 + 1
  3123. # same /Kids array? ------------------------------------------
  3124. same = mupdf.pdf_objcmp( kids1, kids2)
  3125. # put source page in target /Kids array ----------------------
  3126. if not copy and same != 0: # update parent in page object
  3127. mupdf.pdf_dict_put( page1, PDF_NAME('Parent'), parent2)
  3128. mupdf.pdf_array_insert( kids2, page1, pos)
  3129. if same != 0: # different /Kids arrays ----------------------
  3130. parent = parent2
  3131. while parent.m_internal: # increase /Count objects in parents
  3132. count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
  3133. mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
  3134. parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
  3135. if not copy: # delete original item
  3136. mupdf.pdf_array_delete( kids1, i1)
  3137. parent = parent1
  3138. while parent.m_internal: # decrease /Count objects in parents
  3139. count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
  3140. mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count - 1)
  3141. parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
  3142. else: # same /Kids array
  3143. if copy: # source page is copied
  3144. parent = parent2
  3145. while parent.m_internal: # increase /Count object in parents
  3146. count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
  3147. mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
  3148. parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
  3149. else:
  3150. if i1 < pos:
  3151. mupdf.pdf_array_delete( kids1, i1)
  3152. else:
  3153. mupdf.pdf_array_delete( kids1, i1 + 1)
  3154. if pdf.m_internal.rev_page_map: # page map no longer valid: drop it
  3155. mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
  3156. self._reset_page_refs()
  3157. def _newPage(self, pno=-1, width=595, height=842):
  3158. """Make a new PDF page."""
  3159. if self.is_closed or self.is_encrypted:
  3160. raise ValueError("document closed or encrypted")
  3161. if g_use_extra:
  3162. extra._newPage( self.this, pno, width, height)
  3163. else:
  3164. pdf = _as_pdf_document(self)
  3165. mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  3166. mediabox.x1 = width
  3167. mediabox.y1 = height
  3168. contents = mupdf.FzBuffer()
  3169. if pno < -1:
  3170. raise ValueError( MSG_BAD_PAGENO)
  3171. # create /Resources and /Contents objects
  3172. #resources = pdf.add_object(pdf.new_dict(1))
  3173. resources = mupdf.pdf_add_new_dict(pdf, 1)
  3174. page_obj = mupdf.pdf_add_page( pdf, mediabox, 0, resources, contents)
  3175. mupdf.pdf_insert_page( pdf, pno, page_obj)
  3176. # fixme: pdf->dirty = 1;
  3177. self._reset_page_refs()
  3178. return self[pno]
  3179. def _remove_links_to(self, numbers):
  3180. pdf = _as_pdf_document(self)
  3181. _remove_dest_range(pdf, numbers)
  3182. def _remove_toc_item(self, xref):
  3183. # "remove" bookmark by letting it point to nowhere
  3184. pdf = _as_pdf_document(self)
  3185. item = mupdf.pdf_new_indirect(pdf, xref, 0)
  3186. mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
  3187. mupdf.pdf_dict_del( item, PDF_NAME('A'))
  3188. color = mupdf.pdf_new_array( pdf, 3)
  3189. for i in range(3):
  3190. mupdf.pdf_array_push_real( color, 0.8)
  3191. mupdf.pdf_dict_put( item, PDF_NAME('C'), color)
  3192. def _reset_page_refs(self):
  3193. """Invalidate all pages in document dictionary."""
  3194. if getattr(self, "is_closed", True):
  3195. return
  3196. pages = [p for p in self._page_refs.values()]
  3197. for page in pages:
  3198. if page:
  3199. page._erase()
  3200. page = None
  3201. self._page_refs.clear()
  3202. def _set_page_labels(self, labels):
  3203. pdf = _as_pdf_document(self)
  3204. pagelabels = mupdf.pdf_new_name("PageLabels")
  3205. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  3206. mupdf.pdf_dict_del(root, pagelabels)
  3207. mupdf.pdf_dict_putl(root, mupdf.pdf_new_array(pdf, 0), pagelabels, PDF_NAME('Nums'))
  3208. xref = self.pdf_catalog()
  3209. text = self.xref_object(xref, compressed=True)
  3210. text = text.replace("/Nums[]", "/Nums[%s]" % labels)
  3211. self.update_object(xref, text)
  3212. def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None):
  3213. '''
  3214. "update" bookmark by letting it point to nowhere
  3215. '''
  3216. pdf = _as_pdf_document(self)
  3217. item = mupdf.pdf_new_indirect( pdf, xref, 0)
  3218. if title:
  3219. mupdf.pdf_dict_put_text_string( item, PDF_NAME('Title'), title)
  3220. if action:
  3221. mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
  3222. obj = JM_pdf_obj_from_str( pdf, action)
  3223. mupdf.pdf_dict_put( item, PDF_NAME('A'), obj)
  3224. mupdf.pdf_dict_put_int( item, PDF_NAME('F'), flags)
  3225. if color:
  3226. c = mupdf.pdf_new_array( pdf, 3)
  3227. for i in range(3):
  3228. f = color[i]
  3229. mupdf.pdf_array_push_real( c, f)
  3230. mupdf.pdf_dict_put( item, PDF_NAME('C'), c)
  3231. elif color is not None:
  3232. mupdf.pdf_dict_del( item, PDF_NAME('C'))
  3233. if collapse is not None:
  3234. if mupdf.pdf_dict_get( item, PDF_NAME('Count')).m_internal:
  3235. i = mupdf.pdf_dict_get_int( item, PDF_NAME('Count'))
  3236. if (i < 0 and collapse is False) or (i > 0 and collapse is True):
  3237. i = i * (-1)
  3238. mupdf.pdf_dict_put_int( item, PDF_NAME('Count'), i)
  3239. @property
  3240. def FormFonts(self):
  3241. """Get list of field font resource names."""
  3242. pdf = _as_pdf_document(self, required=0)
  3243. if not pdf.m_internal:
  3244. return
  3245. fonts = mupdf.pdf_dict_getl(
  3246. mupdf.pdf_trailer(pdf),
  3247. PDF_NAME('Root'),
  3248. PDF_NAME('AcroForm'),
  3249. PDF_NAME('DR'),
  3250. PDF_NAME('Font'),
  3251. )
  3252. liste = list()
  3253. if fonts.m_internal and mupdf.pdf_is_dict(fonts): # fonts exist
  3254. n = mupdf.pdf_dict_len(fonts)
  3255. for i in range(n):
  3256. f = mupdf.pdf_dict_get_key(fonts, i)
  3257. liste.append(JM_UnicodeFromStr(mupdf.pdf_to_name(f)))
  3258. return liste
  3259. def add_layer(self, name, creator=None, on=None):
  3260. """Add a new OC layer."""
  3261. pdf = _as_pdf_document(self)
  3262. JM_add_layer_config( pdf, name, creator, on)
  3263. mupdf.ll_pdf_read_ocg( pdf.m_internal)
  3264. def add_ocg(self, name, config=-1, on=1, intent=None, usage=None):
  3265. """Add new optional content group."""
  3266. xref = 0
  3267. pdf = _as_pdf_document(self)
  3268. # make the OCG
  3269. ocg = mupdf.pdf_add_new_dict(pdf, 3)
  3270. mupdf.pdf_dict_put(ocg, PDF_NAME('Type'), PDF_NAME('OCG'))
  3271. mupdf.pdf_dict_put_text_string(ocg, PDF_NAME('Name'), name)
  3272. intents = mupdf.pdf_dict_put_array(ocg, PDF_NAME('Intent'), 2)
  3273. if not intent:
  3274. mupdf.pdf_array_push(intents, PDF_NAME('View'))
  3275. elif not isinstance(intent, str):
  3276. assert 0, f'fixme: intent is not a str. {type(intent)=} {type=}'
  3277. #n = len(intent)
  3278. #for i in range(n):
  3279. # item = intent[i]
  3280. # c = JM_StrAsChar(item);
  3281. # if (c) {
  3282. # pdf_array_push(gctx, intents, pdf_new_name(gctx, c));
  3283. # }
  3284. # Py_DECREF(item);
  3285. #}
  3286. else:
  3287. mupdf.pdf_array_push(intents, mupdf.pdf_new_name(intent))
  3288. use_for = mupdf.pdf_dict_put_dict(ocg, PDF_NAME('Usage'), 3)
  3289. ci_name = mupdf.pdf_new_name("CreatorInfo")
  3290. cre_info = mupdf.pdf_dict_put_dict(use_for, ci_name, 2)
  3291. mupdf.pdf_dict_put_text_string(cre_info, PDF_NAME('Creator'), "PyMuPDF")
  3292. if usage:
  3293. mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), usage)
  3294. else:
  3295. mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), "Artwork")
  3296. indocg = mupdf.pdf_add_object(pdf, ocg)
  3297. # Insert OCG in the right config
  3298. ocp = JM_ensure_ocproperties(pdf)
  3299. obj = mupdf.pdf_dict_get(ocp, PDF_NAME('OCGs'))
  3300. mupdf.pdf_array_push(obj, indocg)
  3301. if config > -1:
  3302. obj = mupdf.pdf_dict_get(ocp, PDF_NAME('Configs'))
  3303. if not mupdf.pdf_is_array(obj):
  3304. raise ValueError( MSG_BAD_OC_CONFIG)
  3305. cfg = mupdf.pdf_array_get(obj, config)
  3306. if not cfg.m_internal:
  3307. raise ValueError( MSG_BAD_OC_CONFIG)
  3308. else:
  3309. cfg = mupdf.pdf_dict_get(ocp, PDF_NAME('D'))
  3310. obj = mupdf.pdf_dict_get(cfg, PDF_NAME('Order'))
  3311. if not obj.m_internal:
  3312. obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('Order'), 1)
  3313. mupdf.pdf_array_push(obj, indocg)
  3314. if on:
  3315. obj = mupdf.pdf_dict_get(cfg, PDF_NAME('ON'))
  3316. if not obj.m_internal:
  3317. obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('ON'), 1)
  3318. else:
  3319. obj =mupdf.pdf_dict_get(cfg, PDF_NAME('OFF'))
  3320. if not obj.m_internal:
  3321. obj =mupdf.pdf_dict_put_array(cfg, PDF_NAME('OFF'), 1)
  3322. mupdf.pdf_array_push(obj, indocg)
  3323. # let MuPDF take note: re-read OCProperties
  3324. mupdf.ll_pdf_read_ocg(pdf.m_internal)
  3325. xref = mupdf.pdf_to_num(indocg)
  3326. return xref
  3327. def authenticate(self, password):
  3328. """Decrypt document."""
  3329. if self.is_closed:
  3330. raise ValueError("document closed")
  3331. val = mupdf.fz_authenticate_password(self.this, password)
  3332. if val: # the doc is decrypted successfully and we init the outline
  3333. self.is_encrypted = False
  3334. self.is_encrypted = False
  3335. self.init_doc()
  3336. self.thisown = True
  3337. return val
  3338. def can_save_incrementally(self):
  3339. """Check whether incremental saves are possible."""
  3340. pdf = _as_pdf_document(self, required=0)
  3341. if not pdf.m_internal:
  3342. return False
  3343. return mupdf.pdf_can_be_saved_incrementally(pdf)
  3344. def bake(self, *, annots: bool = True, widgets: bool = True) -> None:
  3345. """Convert annotations or fields to permanent content.
  3346. Notes:
  3347. Converts annotations or widgets to permanent page content, like
  3348. text and vector graphics, as appropriate.
  3349. After execution, pages will still look the same, but no longer
  3350. have annotations, respectively no fields.
  3351. If widgets are selected the PDF will no longer be a Form PDF.
  3352. Args:
  3353. annots: convert annotations
  3354. widgets: convert form fields
  3355. """
  3356. pdf = _as_pdf_document(self)
  3357. mupdf.pdf_bake_document(pdf, int(annots), int(widgets))
  3358. @property
  3359. def chapter_count(self):
  3360. """Number of chapters."""
  3361. if self.is_closed:
  3362. raise ValueError("document closed")
  3363. return mupdf.fz_count_chapters( self.this)
  3364. def chapter_page_count(self, chapter):
  3365. """Page count of chapter."""
  3366. if self.is_closed:
  3367. raise ValueError("document closed")
  3368. chapters = mupdf.fz_count_chapters( self.this)
  3369. if chapter < 0 or chapter >= chapters:
  3370. raise ValueError( "bad chapter number")
  3371. pages = mupdf.fz_count_chapter_pages( self.this, chapter)
  3372. return pages
  3373. def close(self):
  3374. """Close document."""
  3375. if getattr(self, "is_closed", True):
  3376. raise ValueError("document closed")
  3377. # self._cleanup()
  3378. if hasattr(self, "_outline") and self._outline:
  3379. self._outline = None
  3380. self._reset_page_refs()
  3381. #self.metadata = None
  3382. #self.stream = None
  3383. self.is_closed = True
  3384. #self.FontInfos = []
  3385. self.Graftmaps = {} # Fixes test_3140().
  3386. #self.ShownPages = {}
  3387. #self.InsertedImages = {}
  3388. #self.this = None
  3389. self.this = None
  3390. def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0):
  3391. """Convert document to a PDF, selecting page range and optional rotation. Output bytes object."""
  3392. if self.is_closed or self.is_encrypted:
  3393. raise ValueError("document closed or encrypted")
  3394. fz_doc = self.this
  3395. fp = from_page
  3396. tp = to_page
  3397. srcCount = mupdf.fz_count_pages(fz_doc)
  3398. if fp < 0:
  3399. fp = 0
  3400. if fp > srcCount - 1:
  3401. fp = srcCount - 1
  3402. if tp < 0:
  3403. tp = srcCount - 1
  3404. if tp > srcCount - 1:
  3405. tp = srcCount - 1
  3406. len0 = len(JM_mupdf_warnings_store)
  3407. doc = JM_convert_to_pdf(fz_doc, fp, tp, rotate)
  3408. len1 = len(JM_mupdf_warnings_store)
  3409. for i in range(len0, len1):
  3410. message(f'{JM_mupdf_warnings_store[i]}')
  3411. return doc
  3412. def copy_page(self, pno: int, to: int =-1):
  3413. """Copy a page within a PDF document.
  3414. This will only create another reference of the same page object.
  3415. Args:
  3416. pno: source page number
  3417. to: put before this page, '-1' means after last page.
  3418. """
  3419. if self.is_closed:
  3420. raise ValueError("document closed")
  3421. page_count = len(self)
  3422. if (
  3423. pno not in range(page_count)
  3424. or to not in range(-1, page_count)
  3425. ):
  3426. raise ValueError("bad page number(s)")
  3427. before = 1
  3428. copy = 1
  3429. if to == -1:
  3430. to = page_count - 1
  3431. before = 0
  3432. return self._move_copy_page(pno, to, before, copy)
  3433. def del_xml_metadata(self):
  3434. """Delete XML metadata."""
  3435. if self.is_closed or self.is_encrypted:
  3436. raise ValueError("document closed or encrypted")
  3437. pdf = _as_pdf_document(self)
  3438. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  3439. if root.m_internal:
  3440. mupdf.pdf_dict_del( root, PDF_NAME('Metadata'))
  3441. def delete_page(self, pno: int =-1):
  3442. """ Delete one page from a PDF.
  3443. """
  3444. return self.delete_pages(pno)
  3445. def delete_pages(self, *args, **kw):
  3446. """Delete pages from a PDF.
  3447. Args:
  3448. Either keywords 'from_page'/'to_page', or two integers to
  3449. specify the first/last page to delete.
  3450. Or a list/tuple/range object, which can contain arbitrary
  3451. page numbers.
  3452. Or a single integer page number.
  3453. """
  3454. if not self.is_pdf:
  3455. raise ValueError("is no PDF")
  3456. if self.is_closed:
  3457. raise ValueError("document closed")
  3458. page_count = self.page_count # page count of document
  3459. f = t = -1
  3460. if kw: # check if keywords were used
  3461. if args: # then no positional args are allowed
  3462. raise ValueError("cannot mix keyword and positional argument")
  3463. f = kw.get("from_page", -1) # first page to delete
  3464. t = kw.get("to_page", -1) # last page to delete
  3465. while f < 0:
  3466. f += page_count
  3467. while t < 0:
  3468. t += page_count
  3469. if not f <= t < page_count:
  3470. raise ValueError("bad page number(s)")
  3471. numbers = tuple(range(f, t + 1))
  3472. else:
  3473. if len(args) > 2 or args == []:
  3474. raise ValueError("need 1 or 2 positional arguments")
  3475. if len(args) == 2:
  3476. f, t = args
  3477. if not (type(f) is int and type(t) is int):
  3478. raise ValueError("both arguments must be int")
  3479. if f > t:
  3480. f, t = t, f
  3481. if not f <= t < page_count:
  3482. raise ValueError("bad page number(s)")
  3483. numbers = tuple(range(f, t + 1))
  3484. elif isinstance(args[0], int):
  3485. pno = args[0]
  3486. while pno < 0:
  3487. pno += page_count
  3488. numbers = (pno,)
  3489. else:
  3490. numbers = tuple(args[0])
  3491. numbers = list(map(int, set(numbers))) # ensure unique integers
  3492. if numbers == []:
  3493. message("nothing to delete")
  3494. return
  3495. numbers.sort()
  3496. if numbers[0] < 0 or numbers[-1] >= page_count:
  3497. raise ValueError("bad page number(s)")
  3498. frozen_numbers = frozenset(numbers)
  3499. toc = self.get_toc()
  3500. for i, xref in enumerate(self.get_outline_xrefs()):
  3501. if toc[i][2] - 1 in frozen_numbers:
  3502. self._remove_toc_item(xref) # remove target in PDF object
  3503. self._remove_links_to(frozen_numbers)
  3504. for i in reversed(numbers): # delete pages, last to first
  3505. self._delete_page(i)
  3506. self._reset_page_refs()
  3507. def embfile_add(self,
  3508. name: str,
  3509. buffer_: ByteString,
  3510. filename: OptStr =None,
  3511. ufilename: OptStr =None,
  3512. desc: OptStr =None,
  3513. ) -> None:
  3514. """Add an item to the EmbeddedFiles array.
  3515. Args:
  3516. name: name of the new item, must not already exist.
  3517. buffer_: (binary data) the file content.
  3518. filename: (str) the file name, default: the name
  3519. ufilename: (unicode) the file name, default: filename
  3520. desc: (str) the description.
  3521. """
  3522. filenames = self.embfile_names()
  3523. msg = "Name '%s' already exists." % str(name)
  3524. if name in filenames:
  3525. raise ValueError(msg)
  3526. if filename is None:
  3527. filename = name
  3528. if ufilename is None:
  3529. ufilename = filename
  3530. if desc is None:
  3531. desc = name
  3532. xref = self._embfile_add(
  3533. name,
  3534. buffer_=buffer_,
  3535. filename=filename,
  3536. ufilename=ufilename,
  3537. desc=desc,
  3538. )
  3539. date = get_pdf_now()
  3540. self.xref_set_key(xref, "Type", "/EmbeddedFile")
  3541. self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date))
  3542. self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
  3543. return xref
  3544. def embfile_count(self) -> int:
  3545. """Get number of EmbeddedFiles."""
  3546. return len(self.embfile_names())
  3547. def embfile_del(self, item: typing.Union[int, str]):
  3548. """Delete an entry from EmbeddedFiles.
  3549. Notes:
  3550. The argument must be name or index of an EmbeddedFiles item.
  3551. Physical deletion of data will happen on save to a new
  3552. file with appropriate garbage option.
  3553. Args:
  3554. item: name or number of item.
  3555. Returns:
  3556. None
  3557. """
  3558. idx = self._embeddedFileIndex(item)
  3559. return self._embfile_del(idx)
  3560. def embfile_get(self, item: typing.Union[int, str]) -> bytes:
  3561. """Get the content of an item in the EmbeddedFiles array.
  3562. Args:
  3563. item: number or name of item.
  3564. Returns:
  3565. (bytes) The file content.
  3566. """
  3567. idx = self._embeddedFileIndex(item)
  3568. return self._embeddedFileGet(idx)
  3569. def embfile_info(self, item: typing.Union[int, str]) -> dict:
  3570. """Get information of an item in the EmbeddedFiles array.
  3571. Args:
  3572. item: number or name of item.
  3573. Returns:
  3574. Information dictionary.
  3575. """
  3576. idx = self._embeddedFileIndex(item)
  3577. infodict = {"name": self.embfile_names()[idx]}
  3578. xref = self._embfile_info(idx, infodict)
  3579. t, date = self.xref_get_key(xref, "Params/CreationDate")
  3580. if t != "null":
  3581. infodict["creationDate"] = date
  3582. t, date = self.xref_get_key(xref, "Params/ModDate")
  3583. if t != "null":
  3584. infodict["modDate"] = date
  3585. t, md5 = self.xref_get_key(xref, "Params/CheckSum")
  3586. if t != "null":
  3587. infodict["checksum"] = binascii.hexlify(md5.encode()).decode()
  3588. return infodict
  3589. def embfile_names(self) -> list:
  3590. """Get list of names of EmbeddedFiles."""
  3591. filenames = []
  3592. self._embfile_names(filenames)
  3593. return filenames
  3594. def embfile_upd(self,
  3595. item: typing.Union[int, str],
  3596. buffer_: OptBytes =None,
  3597. filename: OptStr =None,
  3598. ufilename: OptStr =None,
  3599. desc: OptStr =None,
  3600. ) -> None:
  3601. """Change an item of the EmbeddedFiles array.
  3602. Notes:
  3603. Only provided parameters are changed. If all are omitted,
  3604. the method is a no-op.
  3605. Args:
  3606. item: number or name of item.
  3607. buffer_: (binary data) the new file content.
  3608. filename: (str) the new file name.
  3609. ufilename: (unicode) the new filen ame.
  3610. desc: (str) the new description.
  3611. """
  3612. idx = self._embeddedFileIndex(item)
  3613. xref = self._embfile_upd(
  3614. idx,
  3615. buffer_=buffer_,
  3616. filename=filename,
  3617. ufilename=ufilename,
  3618. desc=desc,
  3619. )
  3620. date = get_pdf_now()
  3621. self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
  3622. return xref
  3623. def extract_font(self, xref=0, info_only=0, named=None):
  3624. '''
  3625. Get a font by xref. Returns a tuple or dictionary.
  3626. '''
  3627. #log( '{=xref info_only}')
  3628. pdf = _as_pdf_document(self)
  3629. obj = mupdf.pdf_load_object(pdf, xref)
  3630. type_ = mupdf.pdf_dict_get(obj, PDF_NAME('Type'))
  3631. subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
  3632. if (mupdf.pdf_name_eq(type_, PDF_NAME('Font'))
  3633. and not mupdf.pdf_to_name( subtype).startswith('CIDFontType')
  3634. ):
  3635. basefont = mupdf.pdf_dict_get(obj, PDF_NAME('BaseFont'))
  3636. if not basefont.m_internal or mupdf.pdf_is_null(basefont):
  3637. bname = mupdf.pdf_dict_get(obj, PDF_NAME('Name'))
  3638. else:
  3639. bname = basefont
  3640. ext = JM_get_fontextension(pdf, xref)
  3641. if ext != 'n/a' and not info_only:
  3642. buffer_ = JM_get_fontbuffer(pdf, xref)
  3643. bytes_ = JM_BinFromBuffer(buffer_)
  3644. else:
  3645. bytes_ = b''
  3646. if not named:
  3647. rc = (
  3648. JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
  3649. JM_UnicodeFromStr(ext),
  3650. JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
  3651. bytes_,
  3652. )
  3653. else:
  3654. rc = {
  3655. dictkey_name: JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
  3656. dictkey_ext: JM_UnicodeFromStr(ext),
  3657. dictkey_type: JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
  3658. dictkey_content: bytes_,
  3659. }
  3660. else:
  3661. if not named:
  3662. rc = '', '', '', b''
  3663. else:
  3664. rc = {
  3665. dictkey_name: '',
  3666. dictkey_ext: '',
  3667. dictkey_type: '',
  3668. dictkey_content: b'',
  3669. }
  3670. return rc
  3671. def extract_image(self, xref):
  3672. """Get image by xref. Returns a dictionary."""
  3673. if self.is_closed or self.is_encrypted:
  3674. raise ValueError("document closed or encrypted")
  3675. pdf = _as_pdf_document(self)
  3676. if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
  3677. raise ValueError( MSG_BAD_XREF)
  3678. obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  3679. subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
  3680. if not mupdf.pdf_name_eq(subtype, PDF_NAME('Image')):
  3681. raise ValueError( "not an image")
  3682. o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask'))
  3683. if o.m_internal:
  3684. smask = mupdf.pdf_to_num(o)
  3685. else:
  3686. smask = 0
  3687. # load the image
  3688. img = mupdf.pdf_load_image(pdf, obj)
  3689. rc = dict()
  3690. _make_image_dict(img, rc)
  3691. rc[dictkey_smask] = smask
  3692. rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace())
  3693. return rc
  3694. def ez_save(
  3695. self,
  3696. filename,
  3697. garbage=3,
  3698. clean=False,
  3699. deflate=True,
  3700. deflate_images=True,
  3701. deflate_fonts=True,
  3702. incremental=False,
  3703. ascii=False,
  3704. expand=False,
  3705. linear=False,
  3706. pretty=False,
  3707. encryption=1,
  3708. permissions=4095,
  3709. owner_pw=None,
  3710. user_pw=None,
  3711. no_new_id=True,
  3712. preserve_metadata=1,
  3713. use_objstms=1,
  3714. compression_effort=0,
  3715. ):
  3716. '''
  3717. Save PDF using some different defaults
  3718. '''
  3719. return self.save(
  3720. filename,
  3721. garbage=garbage,
  3722. clean=clean,
  3723. deflate=deflate,
  3724. deflate_images=deflate_images,
  3725. deflate_fonts=deflate_fonts,
  3726. incremental=incremental,
  3727. ascii=ascii,
  3728. expand=expand,
  3729. linear=linear,
  3730. pretty=pretty,
  3731. encryption=encryption,
  3732. permissions=permissions,
  3733. owner_pw=owner_pw,
  3734. user_pw=user_pw,
  3735. no_new_id=no_new_id,
  3736. preserve_metadata=preserve_metadata,
  3737. use_objstms=use_objstms,
  3738. compression_effort=compression_effort,
  3739. )
  3740. def find_bookmark(self, bm):
  3741. """Find new location after layouting a document."""
  3742. if self.is_closed or self.is_encrypted:
  3743. raise ValueError("document closed or encrypted")
  3744. location = mupdf.fz_lookup_bookmark2( self.this, bm)
  3745. return location.chapter, location.page
  3746. def fullcopy_page(self, pno, to=-1):
  3747. """Make a full page duplicate."""
  3748. pdf = _as_pdf_document(self)
  3749. page_count = mupdf.pdf_count_pages( pdf)
  3750. try:
  3751. if (not _INRANGE(pno, 0, page_count - 1)
  3752. or not _INRANGE(to, -1, page_count - 1)
  3753. ):
  3754. raise ValueError( MSG_BAD_PAGENO)
  3755. page1 = mupdf.pdf_resolve_indirect( mupdf.pdf_lookup_page_obj( pdf, pno))
  3756. page2 = mupdf.pdf_deep_copy_obj( page1)
  3757. old_annots = mupdf.pdf_dict_get( page2, PDF_NAME('Annots'))
  3758. # copy annotations, but remove Popup and IRT types
  3759. if old_annots.m_internal:
  3760. n = mupdf.pdf_array_len( old_annots)
  3761. new_annots = mupdf.pdf_new_array( pdf, n)
  3762. for i in range(n):
  3763. o = mupdf.pdf_array_get( old_annots, i)
  3764. subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
  3765. if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
  3766. continue
  3767. if mupdf.pdf_dict_gets( o, "IRT").m_internal:
  3768. continue
  3769. copy_o = mupdf.pdf_deep_copy_obj( mupdf.pdf_resolve_indirect( o))
  3770. xref = mupdf.pdf_create_object( pdf)
  3771. mupdf.pdf_update_object( pdf, xref, copy_o)
  3772. copy_o = mupdf.pdf_new_indirect( pdf, xref, 0)
  3773. mupdf.pdf_dict_del( copy_o, PDF_NAME('Popup'))
  3774. mupdf.pdf_dict_del( copy_o, PDF_NAME('P'))
  3775. mupdf.pdf_array_push( new_annots, copy_o)
  3776. mupdf.pdf_dict_put( page2, PDF_NAME('Annots'), new_annots)
  3777. # copy the old contents stream(s)
  3778. res = JM_read_contents( page1)
  3779. # create new /Contents object for page2
  3780. if res and res.m_internal:
  3781. #contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" ", 1), NULL, 0)
  3782. contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" "), mupdf.PdfObj(), 0)
  3783. JM_update_stream( pdf, contents, res, 1)
  3784. mupdf.pdf_dict_put( page2, PDF_NAME('Contents'), contents)
  3785. # now insert target page, making sure it is an indirect object
  3786. xref = mupdf.pdf_create_object( pdf) # get new xref
  3787. mupdf.pdf_update_object( pdf, xref, page2) # store new page
  3788. page2 = mupdf.pdf_new_indirect( pdf, xref, 0) # reread object
  3789. mupdf.pdf_insert_page( pdf, to, page2) # and store the page
  3790. finally:
  3791. mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
  3792. self._reset_page_refs()
  3793. def get_layer(self, config=-1):
  3794. """Content of ON, OFF, RBGroups of an OC layer."""
  3795. pdf = _as_pdf_document(self)
  3796. ocp = mupdf.pdf_dict_getl(
  3797. mupdf.pdf_trailer( pdf),
  3798. PDF_NAME('Root'),
  3799. PDF_NAME('OCProperties'),
  3800. )
  3801. if not ocp.m_internal:
  3802. return
  3803. if config == -1:
  3804. obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
  3805. else:
  3806. obj = mupdf.pdf_array_get(
  3807. mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
  3808. config,
  3809. )
  3810. if not obj.m_internal:
  3811. raise ValueError( MSG_BAD_OC_CONFIG)
  3812. rc = JM_get_ocg_arrays( obj)
  3813. return rc
  3814. def get_layers(self):
  3815. """Show optional OC layers."""
  3816. pdf = _as_pdf_document(self)
  3817. n = mupdf.pdf_count_layer_configs( pdf)
  3818. if n == 1:
  3819. obj = mupdf.pdf_dict_getl(
  3820. mupdf.pdf_trailer( pdf),
  3821. PDF_NAME('Root'),
  3822. PDF_NAME('OCProperties'),
  3823. PDF_NAME('Configs'),
  3824. )
  3825. if not mupdf.pdf_is_array( obj):
  3826. n = 0
  3827. rc = []
  3828. info = mupdf.PdfLayerConfig()
  3829. for i in range(n):
  3830. mupdf.pdf_layer_config_info( pdf, i, info)
  3831. item = {
  3832. "number": i,
  3833. "name": info.name,
  3834. "creator": info.creator,
  3835. }
  3836. rc.append( item)
  3837. return rc
  3838. def get_new_xref(self):
  3839. """Make new xref."""
  3840. if self.is_closed or self.is_encrypted:
  3841. raise ValueError("document closed or encrypted")
  3842. pdf = _as_pdf_document(self)
  3843. xref = 0
  3844. ENSURE_OPERATION(pdf)
  3845. xref = mupdf.pdf_create_object(pdf)
  3846. return xref
  3847. def get_ocgs(self):
  3848. """Show existing optional content groups."""
  3849. ci = mupdf.pdf_new_name( "CreatorInfo")
  3850. pdf = _as_pdf_document(self)
  3851. ocgs = mupdf.pdf_dict_getl(
  3852. mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')),
  3853. PDF_NAME('OCProperties'),
  3854. PDF_NAME('OCGs'),
  3855. )
  3856. rc = dict()
  3857. if not mupdf.pdf_is_array( ocgs):
  3858. return rc
  3859. n = mupdf.pdf_array_len( ocgs)
  3860. for i in range(n):
  3861. ocg = mupdf.pdf_array_get( ocgs, i)
  3862. xref = mupdf.pdf_to_num( ocg)
  3863. name = mupdf.pdf_to_text_string( mupdf.pdf_dict_get( ocg, PDF_NAME('Name')))
  3864. obj = mupdf.pdf_dict_getl( ocg, PDF_NAME('Usage'), ci, PDF_NAME('Subtype'))
  3865. usage = None
  3866. if obj.m_internal:
  3867. usage = mupdf.pdf_to_name( obj)
  3868. intents = list()
  3869. intent = mupdf.pdf_dict_get( ocg, PDF_NAME('Intent'))
  3870. if intent.m_internal:
  3871. if mupdf.pdf_is_name( intent):
  3872. intents.append( mupdf.pdf_to_name( intent))
  3873. elif mupdf.pdf_is_array( intent):
  3874. m = mupdf.pdf_array_len( intent)
  3875. for j in range(m):
  3876. o = mupdf.pdf_array_get( intent, j)
  3877. if mupdf.pdf_is_name( o):
  3878. intents.append( mupdf.pdf_to_name( o))
  3879. hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
  3880. item = {
  3881. "name": name,
  3882. "intent": intents,
  3883. "on": not hidden,
  3884. "usage": usage,
  3885. }
  3886. temp = xref
  3887. rc[ temp] = item
  3888. return rc
  3889. def get_outline_xrefs(self):
  3890. """Get list of outline xref numbers."""
  3891. xrefs = []
  3892. pdf = _as_pdf_document(self, required=0)
  3893. if not pdf.m_internal:
  3894. return xrefs
  3895. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  3896. if not root.m_internal:
  3897. return xrefs
  3898. olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
  3899. if not olroot.m_internal:
  3900. return xrefs
  3901. first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
  3902. if not first.m_internal:
  3903. return xrefs
  3904. xrefs = JM_outline_xrefs(first, xrefs)
  3905. return xrefs
  3906. def get_page_fonts(self, pno: int, full: bool =False) -> list:
  3907. """Retrieve a list of fonts used on a page.
  3908. """
  3909. if self.is_closed or self.is_encrypted:
  3910. raise ValueError("document closed or encrypted")
  3911. if not self.is_pdf:
  3912. return ()
  3913. if type(pno) is not int:
  3914. try:
  3915. pno = pno.number
  3916. except Exception:
  3917. exception_info()
  3918. raise ValueError("need a Page or page number")
  3919. val = self._getPageInfo(pno, 1)
  3920. if not full:
  3921. return [v[:-1] for v in val]
  3922. return val
  3923. def get_page_images(self, pno: int, full: bool =False) -> list:
  3924. """Retrieve a list of images used on a page.
  3925. """
  3926. if self.is_closed or self.is_encrypted:
  3927. raise ValueError("document closed or encrypted")
  3928. if not self.is_pdf:
  3929. return ()
  3930. val = self._getPageInfo(pno, 2)
  3931. if not full:
  3932. return [v[:-1] for v in val]
  3933. return val
  3934. def get_page_xobjects(self, pno: int) -> list:
  3935. """Retrieve a list of XObjects used on a page.
  3936. """
  3937. if self.is_closed or self.is_encrypted:
  3938. raise ValueError("document closed or encrypted")
  3939. if not self.is_pdf:
  3940. return ()
  3941. val = self._getPageInfo(pno, 3)
  3942. return val
  3943. def get_sigflags(self):
  3944. """Get the /SigFlags value."""
  3945. pdf = _as_pdf_document(self, required=0)
  3946. if not pdf.m_internal:
  3947. return -1 # not a PDF
  3948. sigflags = mupdf.pdf_dict_getl(
  3949. mupdf.pdf_trailer(pdf),
  3950. PDF_NAME('Root'),
  3951. PDF_NAME('AcroForm'),
  3952. PDF_NAME('SigFlags'),
  3953. )
  3954. sigflag = -1
  3955. if sigflags.m_internal:
  3956. sigflag = mupdf.pdf_to_int(sigflags)
  3957. return sigflag
  3958. def get_xml_metadata(self):
  3959. """Get document XML metadata."""
  3960. xml = None
  3961. pdf = _as_pdf_document(self, required=0)
  3962. if pdf.m_internal:
  3963. xml = mupdf.pdf_dict_getl(
  3964. mupdf.pdf_trailer(pdf),
  3965. PDF_NAME('Root'),
  3966. PDF_NAME('Metadata'),
  3967. )
  3968. if xml is not None and xml.m_internal:
  3969. buff = mupdf.pdf_load_stream(xml)
  3970. rc = JM_UnicodeFromBuffer(buff)
  3971. else:
  3972. rc = ''
  3973. return rc
  3974. def init_doc(self):
  3975. if self.is_encrypted:
  3976. raise ValueError("cannot initialize - document still encrypted")
  3977. self._outline = self._loadOutline()
  3978. self.metadata = dict(
  3979. [
  3980. (k,self._getMetadata(v)) for k,v in {
  3981. 'format':'format',
  3982. 'title':'info:Title',
  3983. 'author':'info:Author',
  3984. 'subject':'info:Subject',
  3985. 'keywords':'info:Keywords',
  3986. 'creator':'info:Creator',
  3987. 'producer':'info:Producer',
  3988. 'creationDate':'info:CreationDate',
  3989. 'modDate':'info:ModDate',
  3990. 'trapped':'info:Trapped'
  3991. }.items()
  3992. ]
  3993. )
  3994. self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption')
  3995. def insert_file(self,
  3996. infile,
  3997. from_page=-1,
  3998. to_page=-1,
  3999. start_at=-1,
  4000. rotate=-1,
  4001. links=True,
  4002. annots=True,
  4003. show_progress=0,
  4004. final=1,
  4005. ):
  4006. '''
  4007. Insert an arbitrary supported document to an existing PDF.
  4008. The infile may be given as a filename, a Document or a Pixmap. Other
  4009. parameters - where applicable - equal those of insert_pdf().
  4010. '''
  4011. src = None
  4012. if isinstance(infile, Pixmap):
  4013. if infile.colorspace.n > 3:
  4014. infile = Pixmap(csRGB, infile)
  4015. src = Document("png", infile.tobytes())
  4016. elif isinstance(infile, Document):
  4017. src = infile
  4018. else:
  4019. src = Document(infile)
  4020. if not src:
  4021. raise ValueError("bad infile parameter")
  4022. if not src.is_pdf:
  4023. pdfbytes = src.convert_to_pdf()
  4024. src = Document("pdf", pdfbytes)
  4025. return self.insert_pdf(
  4026. src,
  4027. from_page=from_page,
  4028. to_page=to_page,
  4029. start_at=start_at,
  4030. rotate=rotate,
  4031. links=links,
  4032. annots=annots,
  4033. show_progress=show_progress,
  4034. final=final,
  4035. )
  4036. def insert_pdf(
  4037. self,
  4038. docsrc,
  4039. *,
  4040. from_page=-1,
  4041. to_page=-1,
  4042. start_at=-1,
  4043. rotate=-1,
  4044. links=1,
  4045. annots=1,
  4046. widgets=1,
  4047. join_duplicates=0,
  4048. show_progress=0,
  4049. final=1,
  4050. _gmap=None,
  4051. ):
  4052. """Insert a page range from another PDF.
  4053. Args:
  4054. docsrc: PDF to copy from. Must be different object, but may be same file.
  4055. from_page: (int) first source page to copy, 0-based, default 0.
  4056. to_page: (int) last source page to copy, 0-based, default last page.
  4057. start_at: (int) from_page will become this page number in target.
  4058. rotate: (int) rotate copied pages, default -1 is no change.
  4059. links: (int/bool) whether to also copy links.
  4060. annots: (int/bool) whether to also copy annotations.
  4061. widgets: (int/bool) whether to also copy form fields.
  4062. join_duplicates: (int/bool) join or rename duplicate widget names.
  4063. show_progress: (int) progress message interval, 0 is no messages.
  4064. final: (bool) indicates last insertion from this source PDF.
  4065. _gmap: internal use only
  4066. Copy sequence reversed if from_page > to_page."""
  4067. # Insert pages from a source PDF into this PDF.
  4068. # For reconstructing the links (_do_links method), we must save the
  4069. # insertion point (start_at) if it was specified as -1.
  4070. #log( 'insert_pdf(): start')
  4071. if self.is_closed or self.is_encrypted:
  4072. raise ValueError("document closed or encrypted")
  4073. if self._graft_id == docsrc._graft_id:
  4074. raise ValueError("source and target cannot be same object")
  4075. sa = start_at
  4076. if sa < 0:
  4077. sa = self.page_count
  4078. outCount = self.page_count
  4079. srcCount = docsrc.page_count
  4080. # local copies of page numbers
  4081. fp = from_page
  4082. tp = to_page
  4083. sa = start_at
  4084. # normalize page numbers
  4085. fp = max(fp, 0) # -1 = first page
  4086. fp = min(fp, srcCount - 1) # but do not exceed last page
  4087. if tp < 0:
  4088. tp = srcCount - 1 # -1 = last page
  4089. tp = min(tp, srcCount - 1) # but do not exceed last page
  4090. if sa < 0:
  4091. sa = outCount # -1 = behind last page
  4092. sa = min(sa, outCount) # but that is also the limit
  4093. if len(docsrc) > show_progress > 0:
  4094. inname = os.path.basename(docsrc.name)
  4095. if not inname:
  4096. inname = "memory PDF"
  4097. outname = os.path.basename(self.name)
  4098. if not outname:
  4099. outname = "memory PDF"
  4100. message("Inserting '%s' at '%s'" % (inname, outname))
  4101. # retrieve / make a Graftmap to avoid duplicate objects
  4102. #log( 'insert_pdf(): Graftmaps')
  4103. isrt = docsrc._graft_id
  4104. _gmap = self.Graftmaps.get(isrt, None)
  4105. if _gmap is None:
  4106. #log( 'insert_pdf(): Graftmaps2')
  4107. _gmap = Graftmap(self)
  4108. self.Graftmaps[isrt] = _gmap
  4109. if g_use_extra:
  4110. #log( 'insert_pdf(): calling extra_FzDocument_insert_pdf()')
  4111. extra_FzDocument_insert_pdf(
  4112. self.this,
  4113. docsrc.this,
  4114. from_page,
  4115. to_page,
  4116. start_at,
  4117. rotate,
  4118. links,
  4119. annots,
  4120. show_progress,
  4121. final,
  4122. _gmap,
  4123. )
  4124. #log( 'insert_pdf(): extra_FzDocument_insert_pdf() returned.')
  4125. else:
  4126. pdfout = _as_pdf_document(self)
  4127. pdfsrc = _as_pdf_document(docsrc)
  4128. if not pdfout.m_internal or not pdfsrc.m_internal:
  4129. raise TypeError( "source or target not a PDF")
  4130. ENSURE_OPERATION(pdfout)
  4131. JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap)
  4132. #log( 'insert_pdf(): calling self._reset_page_refs()')
  4133. self._reset_page_refs()
  4134. if links:
  4135. #log( 'insert_pdf(): calling self._do_links()')
  4136. self._do_links(docsrc, from_page=fp, to_page=tp, start_at=sa)
  4137. if widgets:
  4138. self._do_widgets(docsrc, _gmap, from_page=fp, to_page=tp, start_at=sa, join_duplicates=join_duplicates)
  4139. if final == 1:
  4140. self.Graftmaps[isrt] = None
  4141. #log( 'insert_pdf(): returning')
  4142. @property
  4143. def is_dirty(self):
  4144. pdf = _as_pdf_document(self, required=0)
  4145. if not pdf.m_internal:
  4146. return False
  4147. r = mupdf.pdf_has_unsaved_changes(pdf)
  4148. return True if r else False
  4149. @property
  4150. def is_fast_webaccess(self):
  4151. '''
  4152. Check whether we have a linearized PDF.
  4153. '''
  4154. pdf = _as_pdf_document(self, required=0)
  4155. if pdf.m_internal:
  4156. return mupdf.pdf_doc_was_linearized(pdf)
  4157. return False # gracefully handle non-PDF
  4158. @property
  4159. def is_form_pdf(self):
  4160. """Either False or PDF field count."""
  4161. pdf = _as_pdf_document(self, required=0)
  4162. if not pdf.m_internal:
  4163. return False
  4164. count = -1
  4165. try:
  4166. fields = mupdf.pdf_dict_getl(
  4167. mupdf.pdf_trailer(pdf),
  4168. mupdf.PDF_ENUM_NAME_Root,
  4169. mupdf.PDF_ENUM_NAME_AcroForm,
  4170. mupdf.PDF_ENUM_NAME_Fields,
  4171. )
  4172. if mupdf.pdf_is_array(fields):
  4173. count = mupdf.pdf_array_len(fields)
  4174. except Exception:
  4175. if g_exceptions_verbose: exception_info()
  4176. return False
  4177. if count >= 0:
  4178. return count
  4179. return False
  4180. @property
  4181. def is_pdf(self):
  4182. """Check for PDF."""
  4183. if isinstance(self.this, mupdf.PdfDocument):
  4184. return True
  4185. # Avoid calling smupdf.pdf_specifics because it will end up creating
  4186. # a new PdfDocument which will call pdf_create_document(), which is ok
  4187. # but a little unnecessary.
  4188. #
  4189. if mupdf.ll_pdf_specifics(self.this.m_internal):
  4190. ret = True
  4191. else:
  4192. ret = False
  4193. return ret
  4194. @property
  4195. def is_reflowable(self):
  4196. """Check if document is layoutable."""
  4197. if self.is_closed:
  4198. raise ValueError("document closed")
  4199. return bool(mupdf.fz_is_document_reflowable(self))
  4200. @property
  4201. def is_repaired(self):
  4202. """Check whether PDF was repaired."""
  4203. pdf = _as_pdf_document(self, required=0)
  4204. if not pdf.m_internal:
  4205. return False
  4206. r = mupdf.pdf_was_repaired(pdf)
  4207. if r:
  4208. return True
  4209. return False
  4210. def journal_can_do(self):
  4211. """Show if undo and / or redo are possible."""
  4212. if self.is_closed or self.is_encrypted:
  4213. raise ValueError("document closed or encrypted")
  4214. undo=0
  4215. redo=0
  4216. pdf = _as_pdf_document(self)
  4217. undo = mupdf.pdf_can_undo(pdf)
  4218. redo = mupdf.pdf_can_redo(pdf)
  4219. return {'undo': bool(undo), 'redo': bool(redo)}
  4220. def journal_enable(self):
  4221. """Activate document journalling."""
  4222. if self.is_closed or self.is_encrypted:
  4223. raise ValueError("document closed or encrypted")
  4224. pdf = _as_pdf_document(self)
  4225. mupdf.pdf_enable_journal(pdf)
  4226. def journal_is_enabled(self):
  4227. """Check if journalling is enabled."""
  4228. if self.is_closed or self.is_encrypted:
  4229. raise ValueError("document closed or encrypted")
  4230. pdf = _as_pdf_document(self)
  4231. enabled = pdf.m_internal and pdf.m_internal.journal
  4232. return enabled
  4233. def journal_load(self, filename):
  4234. """Load a journal from a file."""
  4235. if self.is_closed or self.is_encrypted:
  4236. raise ValueError("document closed or encrypted")
  4237. pdf = _as_pdf_document(self)
  4238. if isinstance(filename, str):
  4239. mupdf.pdf_load_journal(pdf, filename)
  4240. else:
  4241. res = JM_BufferFromBytes(filename)
  4242. stm = mupdf.fz_open_buffer(res)
  4243. mupdf.pdf_deserialise_journal(pdf, stm)
  4244. if not pdf.m_internal.journal:
  4245. RAISEPY( "Journal and document do not match", JM_Exc_FileDataError)
  4246. def journal_op_name(self, step):
  4247. """Show operation name for given step."""
  4248. if self.is_closed or self.is_encrypted:
  4249. raise ValueError("document closed or encrypted")
  4250. pdf = _as_pdf_document(self)
  4251. name = mupdf.pdf_undoredo_step(pdf, step)
  4252. return name
  4253. def journal_position(self):
  4254. """Show journalling state."""
  4255. if self.is_closed or self.is_encrypted:
  4256. raise ValueError("document closed or encrypted")
  4257. steps=0
  4258. pdf = _as_pdf_document(self)
  4259. rc, steps = mupdf.pdf_undoredo_state(pdf)
  4260. return rc, steps
  4261. def journal_redo(self):
  4262. """Move forward in the journal."""
  4263. if self.is_closed or self.is_encrypted:
  4264. raise ValueError("document closed or encrypted")
  4265. pdf = _as_pdf_document(self)
  4266. mupdf.pdf_redo(pdf)
  4267. return True
  4268. def journal_save(self, filename):
  4269. """Save journal to a file."""
  4270. if self.is_closed or self.is_encrypted:
  4271. raise ValueError("document closed or encrypted")
  4272. pdf = _as_pdf_document(self)
  4273. if isinstance(filename, str):
  4274. mupdf.pdf_save_journal(pdf, filename)
  4275. else:
  4276. out = JM_new_output_fileptr(filename)
  4277. mupdf.pdf_write_journal(pdf, out)
  4278. out.fz_close_output()
  4279. def journal_start_op(self, name=None):
  4280. """Begin a journalling operation."""
  4281. if self.is_closed or self.is_encrypted:
  4282. raise ValueError("document closed or encrypted")
  4283. pdf = _as_pdf_document(self)
  4284. if not pdf.m_internal.journal:
  4285. raise RuntimeError( "Journalling not enabled")
  4286. if name:
  4287. mupdf.pdf_begin_operation(pdf, name)
  4288. else:
  4289. mupdf.pdf_begin_implicit_operation(pdf)
  4290. def journal_stop_op(self):
  4291. """End a journalling operation."""
  4292. if self.is_closed or self.is_encrypted:
  4293. raise ValueError("document closed or encrypted")
  4294. pdf = _as_pdf_document(self)
  4295. mupdf.pdf_end_operation(pdf)
  4296. def journal_undo(self):
  4297. """Move backwards in the journal."""
  4298. if self.is_closed or self.is_encrypted:
  4299. raise ValueError("document closed or encrypted")
  4300. pdf = _as_pdf_document(self)
  4301. mupdf.pdf_undo(pdf)
  4302. return True
  4303. @property
  4304. def language(self):
  4305. """Document language."""
  4306. pdf = _as_pdf_document(self, required=0)
  4307. if not pdf.m_internal:
  4308. return
  4309. lang = mupdf.pdf_document_language(pdf)
  4310. if lang == mupdf.FZ_LANG_UNSET:
  4311. return
  4312. return mupdf.fz_string_from_text_language2(lang)
  4313. @property
  4314. def last_location(self):
  4315. """Id (chapter, page) of last page."""
  4316. if self.is_closed:
  4317. raise ValueError("document closed")
  4318. last_loc = mupdf.fz_last_page(self.this)
  4319. return last_loc.chapter, last_loc.page
  4320. def layer_ui_configs(self):
  4321. """Show OC visibility status modifiable by user."""
  4322. pdf = _as_pdf_document(self)
  4323. info = mupdf.PdfLayerConfigUi()
  4324. n = mupdf.pdf_count_layer_config_ui( pdf)
  4325. rc = []
  4326. for i in range(n):
  4327. mupdf.pdf_layer_config_ui_info( pdf, i, info)
  4328. if info.type == 1:
  4329. type_ = "checkbox"
  4330. elif info.type == 2:
  4331. type_ = "radiobox"
  4332. else:
  4333. type_ = "label"
  4334. item = {
  4335. "number": i,
  4336. "text": info.text,
  4337. "depth": info.depth,
  4338. "type": type_,
  4339. "on": info.selected,
  4340. "locked": info.locked,
  4341. }
  4342. rc.append(item)
  4343. return rc
  4344. def layout(self, rect=None, width=0, height=0, fontsize=11):
  4345. """Re-layout a reflowable document."""
  4346. if self.is_closed or self.is_encrypted:
  4347. raise ValueError("document closed or encrypted")
  4348. doc = self.this
  4349. if not mupdf.fz_is_document_reflowable( doc):
  4350. return
  4351. w = width
  4352. h = height
  4353. r = JM_rect_from_py(rect)
  4354. if not mupdf.fz_is_infinite_rect(r):
  4355. w = r.x1 - r.x0
  4356. h = r.y1 - r.y0
  4357. if w <= 0.0 or h <= 0.0:
  4358. raise ValueError( "bad page size")
  4359. mupdf.fz_layout_document( doc, w, h, fontsize)
  4360. self._reset_page_refs()
  4361. self.init_doc()
  4362. def load_page(self, page_id):
  4363. """Load a page.
  4364. 'page_id' is either a 0-based page number or a tuple (chapter, pno),
  4365. with chapter number and page number within that chapter.
  4366. """
  4367. if self.is_closed or self.is_encrypted:
  4368. raise ValueError("document closed or encrypted")
  4369. if page_id is None:
  4370. page_id = 0
  4371. if page_id not in self:
  4372. raise ValueError("page not in document")
  4373. if type(page_id) is int and page_id < 0:
  4374. np = self.page_count
  4375. while page_id < 0:
  4376. page_id += np
  4377. if isinstance(page_id, int):
  4378. page = mupdf.fz_load_page(self.this, page_id)
  4379. else:
  4380. chapter, pagenum = page_id
  4381. page = mupdf.fz_load_chapter_page(self.this, chapter, pagenum)
  4382. val = Page(page, self)
  4383. val.thisown = True
  4384. val.parent = self
  4385. self._page_refs[id(val)] = val
  4386. val._annot_refs = weakref.WeakValueDictionary()
  4387. val.number = page_id
  4388. return val
  4389. def location_from_page_number(self, pno):
  4390. """Convert pno to (chapter, page)."""
  4391. if self.is_closed:
  4392. raise ValueError("document closed")
  4393. this_doc = self.this
  4394. loc = mupdf.fz_make_location(-1, -1)
  4395. page_count = mupdf.fz_count_pages(this_doc)
  4396. while pno < 0:
  4397. pno += page_count
  4398. if pno >= page_count:
  4399. raise ValueError( MSG_BAD_PAGENO)
  4400. loc = mupdf.fz_location_from_page_number(this_doc, pno)
  4401. return loc.chapter, loc.page
  4402. def make_bookmark(self, loc):
  4403. """Make a page pointer before layouting document."""
  4404. if self.is_closed or self.is_encrypted:
  4405. raise ValueError("document closed or encrypted")
  4406. loc = mupdf.FzLocation(*loc)
  4407. mark = mupdf.ll_fz_make_bookmark2( self.this.m_internal, loc.internal())
  4408. return mark
  4409. @property
  4410. def markinfo(self) -> dict:
  4411. """Return the PDF MarkInfo value."""
  4412. xref = self.pdf_catalog()
  4413. if xref == 0:
  4414. return None
  4415. rc = self.xref_get_key(xref, "MarkInfo")
  4416. if rc[0] == "null":
  4417. return {}
  4418. if rc[0] == "xref":
  4419. xref = int(rc[1].split()[0])
  4420. val = self.xref_object(xref, compressed=True)
  4421. elif rc[0] == "dict":
  4422. val = rc[1]
  4423. else:
  4424. val = None
  4425. if val is None or not (val[:2] == "<<" and val[-2:] == ">>"):
  4426. return {}
  4427. valid = {"Marked": False, "UserProperties": False, "Suspects": False}
  4428. val = val[2:-2].split("/")
  4429. for v in val[1:]:
  4430. try:
  4431. key, value = v.split()
  4432. except Exception:
  4433. if g_exceptions_verbose > 1: exception_info()
  4434. return valid
  4435. if value == "true":
  4436. valid[key] = True
  4437. return valid
  4438. def move_page(self, pno: int, to: int =-1):
  4439. """Move a page within a PDF document.
  4440. Args:
  4441. pno: source page number.
  4442. to: put before this page, '-1' means after last page.
  4443. """
  4444. if self.is_closed:
  4445. raise ValueError("document closed")
  4446. page_count = len(self)
  4447. if (pno not in range(page_count) or to not in range(-1, page_count)):
  4448. raise ValueError("bad page number(s)")
  4449. before = 1
  4450. copy = 0
  4451. if to == -1:
  4452. to = page_count - 1
  4453. before = 0
  4454. return self._move_copy_page(pno, to, before, copy)
  4455. @property
  4456. def name(self):
  4457. return self._name
  4458. def need_appearances(self, value=None):
  4459. """Get/set the NeedAppearances value."""
  4460. if not self.is_form_pdf:
  4461. return None
  4462. pdf = _as_pdf_document(self)
  4463. oldval = -1
  4464. appkey = "NeedAppearances"
  4465. form = mupdf.pdf_dict_getp(
  4466. mupdf.pdf_trailer(pdf),
  4467. "Root/AcroForm",
  4468. )
  4469. app = mupdf.pdf_dict_gets(form, appkey)
  4470. if mupdf.pdf_is_bool(app):
  4471. oldval = mupdf.pdf_to_bool(app)
  4472. if value:
  4473. mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_TRUE)
  4474. else:
  4475. mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_FALSE)
  4476. if value is None:
  4477. return oldval >= 0
  4478. return value
  4479. @property
  4480. def needs_pass(self):
  4481. """Indicate password required."""
  4482. if self.is_closed:
  4483. raise ValueError("document closed")
  4484. document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super()
  4485. ret = mupdf.fz_needs_password( document)
  4486. return ret
  4487. def next_location(self, page_id):
  4488. """Get (chapter, page) of next page."""
  4489. if self.is_closed or self.is_encrypted:
  4490. raise ValueError("document closed or encrypted")
  4491. if type(page_id) is int:
  4492. page_id = (0, page_id)
  4493. if page_id not in self:
  4494. raise ValueError("page id not in document")
  4495. if tuple(page_id) == self.last_location:
  4496. return ()
  4497. this_doc = _as_fz_document(self)
  4498. val = page_id[ 0]
  4499. if not isinstance(val, int):
  4500. RAISEPY(MSG_BAD_PAGEID, PyExc_ValueError)
  4501. chapter = val
  4502. val = page_id[ 1]
  4503. pno = val
  4504. loc = mupdf.fz_make_location(chapter, pno)
  4505. next_loc = mupdf.fz_next_page( this_doc, loc)
  4506. return next_loc.chapter, next_loc.page
  4507. def page_annot_xrefs(self, n):
  4508. if g_use_extra:
  4509. return extra.page_annot_xrefs( self.this, n)
  4510. if isinstance(self.this, mupdf.PdfDocument):
  4511. page_count = mupdf.pdf_count_pages(self.this)
  4512. pdf_document = self.this
  4513. else:
  4514. page_count = mupdf.fz_count_pages(self.this)
  4515. pdf_document = _as_pdf_document(self)
  4516. while n < 0:
  4517. n += page_count
  4518. if n > page_count:
  4519. raise ValueError( MSG_BAD_PAGENO)
  4520. page_obj = mupdf.pdf_lookup_page_obj(pdf_document, n)
  4521. annots = JM_get_annot_xref_list(page_obj)
  4522. return annots
  4523. @property
  4524. def page_count(self):
  4525. """Number of pages."""
  4526. if self.is_closed:
  4527. raise ValueError('document closed')
  4528. if g_use_extra:
  4529. return self.page_count2(self)
  4530. if isinstance( self.this, mupdf.FzDocument):
  4531. return mupdf.fz_count_pages( self.this)
  4532. else:
  4533. return mupdf.pdf_count_pages( self.this)
  4534. def page_cropbox(self, pno):
  4535. """Get CropBox of page number (without loading page)."""
  4536. if self.is_closed:
  4537. raise ValueError("document closed")
  4538. this_doc = self.this
  4539. page_count = mupdf.fz_count_pages( this_doc)
  4540. n = pno
  4541. while n < 0:
  4542. n += page_count
  4543. pdf = _as_pdf_document(self)
  4544. if n >= page_count:
  4545. raise ValueError( MSG_BAD_PAGENO)
  4546. pageref = mupdf.pdf_lookup_page_obj( pdf, n)
  4547. cropbox = JM_cropbox(pageref)
  4548. val = JM_py_from_rect(cropbox)
  4549. val = Rect(val)
  4550. return val
  4551. def page_number_from_location(self, page_id):
  4552. """Convert (chapter, pno) to page number."""
  4553. if type(page_id) is int:
  4554. np = self.page_count
  4555. while page_id < 0:
  4556. page_id += np
  4557. page_id = (0, page_id)
  4558. if page_id not in self:
  4559. raise ValueError("page id not in document")
  4560. chapter, pno = page_id
  4561. loc = mupdf.fz_make_location( chapter, pno)
  4562. page_n = mupdf.fz_page_number_from_location( self.this, loc)
  4563. return page_n
  4564. def page_xref(self, pno):
  4565. """Get xref of page number."""
  4566. if g_use_extra:
  4567. return extra.page_xref( self.this, pno)
  4568. if self.is_closed:
  4569. raise ValueError("document closed")
  4570. page_count = mupdf.fz_count_pages(self.this)
  4571. n = pno
  4572. while n < 0:
  4573. n += page_count
  4574. pdf = _as_pdf_document(self)
  4575. xref = 0
  4576. if n >= page_count:
  4577. raise ValueError( MSG_BAD_PAGENO)
  4578. xref = mupdf.pdf_to_num(mupdf.pdf_lookup_page_obj(pdf, n))
  4579. return xref
  4580. @property
  4581. def pagelayout(self) -> str:
  4582. """Return the PDF PageLayout value.
  4583. """
  4584. xref = self.pdf_catalog()
  4585. if xref == 0:
  4586. return None
  4587. rc = self.xref_get_key(xref, "PageLayout")
  4588. if rc[0] == "null":
  4589. return "SinglePage"
  4590. if rc[0] == "name":
  4591. return rc[1][1:]
  4592. return "SinglePage"
  4593. @property
  4594. def pagemode(self) -> str:
  4595. """Return the PDF PageMode value.
  4596. """
  4597. xref = self.pdf_catalog()
  4598. if xref == 0:
  4599. return None
  4600. rc = self.xref_get_key(xref, "PageMode")
  4601. if rc[0] == "null":
  4602. return "UseNone"
  4603. if rc[0] == "name":
  4604. return rc[1][1:]
  4605. return "UseNone"
  4606. if sys.implementation.version < (3, 9):
  4607. # Appending `[Page]` causes `TypeError: 'ABCMeta' object is not subscriptable`.
  4608. _pages_ret = collections.abc.Iterable
  4609. else:
  4610. _pages_ret = collections.abc.Iterable[Page]
  4611. def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None) -> _pages_ret:
  4612. """Return a generator iterator over a page range.
  4613. Arguments have the same meaning as for the range() built-in.
  4614. """
  4615. if not self.page_count:
  4616. return
  4617. # set the start value
  4618. start = start or 0
  4619. while start < 0:
  4620. start += self.page_count
  4621. if start not in range(self.page_count):
  4622. raise ValueError("bad start page number")
  4623. # set the stop value
  4624. stop = stop if stop is not None and stop <= self.page_count else self.page_count
  4625. # set the step value
  4626. if step == 0:
  4627. raise ValueError("arg 3 must not be zero")
  4628. if step is None:
  4629. if start > stop:
  4630. step = -1
  4631. else:
  4632. step = 1
  4633. for pno in range(start, stop, step):
  4634. yield (self.load_page(pno))
  4635. def pdf_catalog(self):
  4636. """Get xref of PDF catalog."""
  4637. pdf = _as_pdf_document(self, required=0)
  4638. xref = 0
  4639. if not pdf.m_internal:
  4640. return xref
  4641. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  4642. xref = mupdf.pdf_to_num(root)
  4643. return xref
  4644. def pdf_trailer(self, compressed=0, ascii=0):
  4645. """Get PDF trailer as a string."""
  4646. return self.xref_object(-1, compressed=compressed, ascii=ascii)
  4647. @property
  4648. def permissions(self):
  4649. """Document permissions."""
  4650. if self.is_encrypted:
  4651. return 0
  4652. doc =self.this
  4653. pdf = mupdf.pdf_document_from_fz_document(doc)
  4654. # for PDF return result of standard function
  4655. if pdf.m_internal:
  4656. return mupdf.pdf_document_permissions(pdf)
  4657. # otherwise simulate the PDF return value
  4658. perm = 0xFFFFFFFC # all permissions granted
  4659. # now switch off where needed
  4660. if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_PRINT):
  4661. perm = perm ^ mupdf.PDF_PERM_PRINT
  4662. if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_EDIT):
  4663. perm = perm ^ mupdf.PDF_PERM_MODIFY
  4664. if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_COPY):
  4665. perm = perm ^ mupdf.PDF_PERM_COPY
  4666. if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_ANNOTATE):
  4667. perm = perm ^ mupdf.PDF_PERM_ANNOTATE
  4668. return perm
  4669. def prev_location(self, page_id):
  4670. """Get (chapter, page) of previous page."""
  4671. if self.is_closed or self.is_encrypted:
  4672. raise ValueError("document closed or encrypted")
  4673. if type(page_id) is int:
  4674. page_id = (0, page_id)
  4675. if page_id not in self:
  4676. raise ValueError("page id not in document")
  4677. if page_id == (0, 0):
  4678. return ()
  4679. chapter, pno = page_id
  4680. loc = mupdf.fz_make_location(chapter, pno)
  4681. prev_loc = mupdf.fz_previous_page(self.this, loc)
  4682. return prev_loc.chapter, prev_loc.page
  4683. def reload_page(self, page: Page) -> Page:
  4684. """Make a fresh copy of a page."""
  4685. old_annots = {} # copy annot references to here
  4686. pno = page.number # save the page number
  4687. for k, v in page._annot_refs.items(): # save the annot dictionary
  4688. old_annots[k] = v
  4689. # When we call `self.load_page()` below, it will end up in
  4690. # fz_load_chapter_page(), which will return any matching page in the
  4691. # document's list of non-ref-counted loaded pages, instead of actually
  4692. # reloading the page.
  4693. #
  4694. # We want to assert that we have actually reloaded the fz_page, and not
  4695. # simply returned the same `fz_page*` pointer from the document's list
  4696. # of non-ref-counted loaded pages.
  4697. #
  4698. # So we first remove our reference to the `fz_page*`. This will
  4699. # decrement .refs, and if .refs was 1, this is guaranteed to free the
  4700. # `fz_page*` and remove it from the document's list if it was there. So
  4701. # we are guaranteed that our returned `fz_page*` is from a genuine
  4702. # reload, even if it happens to reuse the original block of memory.
  4703. #
  4704. # However if the original .refs is greater than one, there must be
  4705. # other references to the `fz_page` somewhere, and we require that
  4706. # these other references are not keeping the page in the document's
  4707. # list. We check that we are returning a newly loaded page by
  4708. # asserting that our returned `fz_page*` is different from the original
  4709. # `fz_page*` - the original was not freed, so a new `fz_page` cannot
  4710. # reuse the same block of memory.
  4711. #
  4712. refs_old = page.this.m_internal.refs
  4713. m_internal_old = page.this.m_internal_value()
  4714. page.this = None
  4715. page._erase() # remove the page
  4716. page = None
  4717. TOOLS.store_shrink(100)
  4718. page = self.load_page(pno) # reload the page
  4719. # copy annot refs over to the new dictionary
  4720. #page_proxy = weakref.proxy(page)
  4721. for k, v in old_annots.items():
  4722. annot = old_annots[k]
  4723. #annot.parent = page_proxy # refresh parent to new page
  4724. page._annot_refs[k] = annot
  4725. if refs_old == 1:
  4726. # We know that `page.this = None` will have decremented the ref
  4727. # count to zero so we are guaranteed that the new `fz_page` is a
  4728. # new page even if it happens to have reused the same block of
  4729. # memory.
  4730. pass
  4731. else:
  4732. # Check that the new `fz_page*` is different from the original.
  4733. m_internal_new = page.this.m_internal_value()
  4734. assert m_internal_new != m_internal_old, \
  4735. f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}'
  4736. return page
  4737. def resolve_link(self, uri=None, chapters=0):
  4738. """Calculate internal link destination.
  4739. Args:
  4740. uri: (str) some Link.uri
  4741. chapters: (bool) whether to use (chapter, page) format
  4742. Returns:
  4743. (page_id, x, y) where x, y are point coordinates on the page.
  4744. page_id is either page number (if chapters=0), or (chapter, pno).
  4745. """
  4746. if not uri:
  4747. if chapters:
  4748. return (-1, -1), 0, 0
  4749. return -1, 0, 0
  4750. try:
  4751. loc, xp, yp = mupdf.fz_resolve_link(self.this, uri)
  4752. except Exception:
  4753. if g_exceptions_verbose: exception_info()
  4754. if chapters:
  4755. return (-1, -1), 0, 0
  4756. return -1, 0, 0
  4757. if chapters:
  4758. return (loc.chapter, loc.page), xp, yp
  4759. pno = mupdf.fz_page_number_from_location(self.this, loc)
  4760. return pno, xp, yp
  4761. def rewrite_images(
  4762. self,
  4763. dpi_threshold=None,
  4764. dpi_target=0,
  4765. quality=0,
  4766. lossy=True,
  4767. lossless=True,
  4768. bitonal=True,
  4769. color=True,
  4770. gray=True,
  4771. set_to_gray=False,
  4772. options=None,
  4773. ):
  4774. """Rewrite images in a PDF document.
  4775. The typical use case is to reduce the size of the PDF by recompressing
  4776. images. Default parameters will convert all images to JPEG where
  4777. possible, using the specified resolutions and quality. Exclude
  4778. undesired images by setting parameters to False.
  4779. Args:
  4780. dpi_threshold: look at images with a larger DPI only.
  4781. dpi_target: change eligible images to this DPI.
  4782. quality: Quality of the recompressed images (0-100).
  4783. lossy: process lossy image types (e.g. JPEG).
  4784. lossless: process lossless image types (e.g. PNG).
  4785. bitonal: process black-and-white images (e.g. FAX)
  4786. color: process colored images.
  4787. gray: process gray images.
  4788. set_to_gray: whether to change the PDF to gray at process start.
  4789. options: (PdfImageRewriterOptions) Custom options for image
  4790. rewriting (optional). Expert use only. If provided, other
  4791. parameters are ignored, except set_to_gray.
  4792. """
  4793. quality_str = str(quality)
  4794. if not dpi_threshold:
  4795. dpi_threshold = dpi_target = 0
  4796. if dpi_target > 0 and dpi_target >= dpi_threshold:
  4797. raise ValueError("{dpi_target=} must be less than {dpi_threshold=}")
  4798. template_opts = mupdf.PdfImageRewriterOptions()
  4799. dir1 = set(dir(template_opts)) # for checking that only existing options are set
  4800. if not options:
  4801. opts = mupdf.PdfImageRewriterOptions()
  4802. if bitonal:
  4803. opts.bitonal_image_recompress_method = mupdf.FZ_RECOMPRESS_FAX
  4804. opts.bitonal_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  4805. opts.bitonal_image_subsample_to = dpi_target
  4806. opts.bitonal_image_recompress_quality = quality_str
  4807. opts.bitonal_image_subsample_threshold = dpi_threshold
  4808. if color:
  4809. if lossless:
  4810. opts.color_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
  4811. opts.color_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  4812. opts.color_lossless_image_subsample_to = dpi_target
  4813. opts.color_lossless_image_subsample_threshold = dpi_threshold
  4814. opts.color_lossless_image_recompress_quality = quality_str
  4815. if lossy:
  4816. opts.color_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
  4817. opts.color_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  4818. opts.color_lossy_image_subsample_threshold = dpi_threshold
  4819. opts.color_lossy_image_subsample_to = dpi_target
  4820. opts.color_lossy_image_recompress_quality = quality_str
  4821. if gray:
  4822. if lossless:
  4823. opts.gray_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
  4824. opts.gray_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  4825. opts.gray_lossless_image_subsample_to = dpi_target
  4826. opts.gray_lossless_image_subsample_threshold = dpi_threshold
  4827. opts.gray_lossless_image_recompress_quality = quality_str
  4828. if lossy:
  4829. opts.gray_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
  4830. opts.gray_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
  4831. opts.gray_lossy_image_subsample_threshold = dpi_threshold
  4832. opts.gray_lossy_image_subsample_to = dpi_target
  4833. opts.gray_lossy_image_recompress_quality = quality_str
  4834. else:
  4835. opts = options
  4836. dir2 = set(dir(opts)) # checking that only possible options were used
  4837. invalid_options = dir2 - dir1
  4838. if invalid_options:
  4839. raise ValueError(f"Invalid options: {invalid_options}")
  4840. if set_to_gray:
  4841. self.recolor(1)
  4842. pdf = _as_pdf_document(self)
  4843. mupdf.pdf_rewrite_images(pdf, opts)
  4844. def recolor(self, components=1):
  4845. """Change the color component count on all pages.
  4846. Args:
  4847. components: (int) desired color component count, one of 1, 3, 4.
  4848. Invokes the same-named method for all pages.
  4849. """
  4850. if not self.is_pdf:
  4851. raise ValueError("is no PDF")
  4852. for i in range(self.page_count):
  4853. self.load_page(i).recolor(components)
  4854. def resolve_names(self):
  4855. """Convert the PDF's destination names into a Python dict.
  4856. The only parameter is the pymupdf.Document.
  4857. All names found in the catalog under keys "/Dests" and "/Names/Dests" are
  4858. being included.
  4859. Returns:
  4860. A dcitionary with the following layout:
  4861. - key: (str) the name
  4862. - value: (dict) with the following layout:
  4863. * "page": target page number (0-based). If no page number found -1.
  4864. * "to": (x, y) target point on page - currently in PDF coordinates,
  4865. i.e. point (0,0) is the bottom-left of the page.
  4866. * "zoom": (float) the zoom factor
  4867. * "dest": (str) only occurs if the target location on the page has
  4868. not been provided as "/XYZ" or if no page number was found.
  4869. Examples:
  4870. {'__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0},
  4871. '__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}}
  4872. or
  4873. '21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, ...
  4874. """
  4875. if hasattr(self, "_resolved_names"): # do not execute multiple times!
  4876. return self._resolved_names
  4877. # this is a backward listing of page xref to page number
  4878. page_xrefs = {self.page_xref(i): i for i in range(self.page_count)}
  4879. def obj_string(obj):
  4880. """Return string version of a PDF object definition."""
  4881. buffer = mupdf.fz_new_buffer(512)
  4882. output = mupdf.FzOutput(buffer)
  4883. mupdf.pdf_print_obj(output, obj, 1, 0)
  4884. output.fz_close_output()
  4885. return JM_UnicodeFromBuffer(buffer)
  4886. def get_array(val):
  4887. """Generate value of one item of the names dictionary."""
  4888. templ_dict = {"page": -1, "dest": ""} # value template
  4889. if val.pdf_is_indirect():
  4890. val = mupdf.pdf_resolve_indirect(val)
  4891. if val.pdf_is_array():
  4892. array = obj_string(val)
  4893. elif val.pdf_is_dict():
  4894. array = obj_string(mupdf.pdf_dict_gets(val, "D"))
  4895. else: # if all fails return the empty template
  4896. return templ_dict
  4897. # replace PDF "null" by zero, omit the square brackets
  4898. array = array.replace("null", "0")[1:-1]
  4899. # find stuff before first "/"
  4900. idx = array.find("/")
  4901. if idx < 1: # this has no target page spec
  4902. templ_dict["dest"] = array # return the orig. string
  4903. return templ_dict
  4904. subval = array[:idx].strip() # stuff before "/"
  4905. array = array[idx:] # stuff from "/" onwards
  4906. templ_dict["dest"] = array
  4907. # if we start with /XYZ: extract x, y, zoom
  4908. # 1, 2 or 3 of these values may actually be supplied
  4909. if array.startswith("/XYZ"):
  4910. del templ_dict["dest"] # don't return orig string in this case
  4911. # make a list of the 3 tokens following "/XYZ"
  4912. array_list = array.split()[1:4] # omit "/XYZ"
  4913. # fill up missing tokens with "0" strings
  4914. while len(array_list) < 3: # fill up if too short
  4915. array_list.append("0") # add missing values
  4916. # make list of 3 floats: x, y and zoom
  4917. t = list(map(float, array_list)) # the resulting x, y, z values
  4918. templ_dict["to"] = (t[0], t[1])
  4919. templ_dict["zoom"] = t[2]
  4920. # extract page number
  4921. if subval.endswith("0 R"): # page xref given?
  4922. templ_dict["page"] = page_xrefs.get(int(subval.split()[0]),-1)
  4923. else: # naked page number given
  4924. templ_dict["page"] = int(subval)
  4925. return templ_dict
  4926. def fill_dict(dest_dict, pdf_dict):
  4927. """Generate name resolution items for pdf_dict.
  4928. This may be either "/Names/Dests" or just "/Dests"
  4929. """
  4930. # length of the PDF dictionary
  4931. name_count = mupdf.pdf_dict_len(pdf_dict)
  4932. # extract key-val of each dict item
  4933. for i in range(name_count):
  4934. key = mupdf.pdf_dict_get_key(pdf_dict, i)
  4935. val = mupdf.pdf_dict_get_val(pdf_dict, i)
  4936. if key.pdf_is_name(): # this should always be true!
  4937. dict_key = key.pdf_to_name()
  4938. else:
  4939. message(f"key {i} is no /Name")
  4940. dict_key = None
  4941. if dict_key:
  4942. dest_dict[dict_key] = get_array(val) # store key/value in dict
  4943. # access underlying PDF document of fz Document
  4944. pdf = mupdf.pdf_document_from_fz_document(self)
  4945. # access PDF catalog
  4946. catalog = mupdf.pdf_dict_gets(mupdf.pdf_trailer(pdf), "Root")
  4947. dest_dict = {}
  4948. # make PDF_NAME(Dests)
  4949. dests = mupdf.pdf_new_name("Dests")
  4950. # extract destinations old style (PDF 1.1)
  4951. old_dests = mupdf.pdf_dict_get(catalog, dests)
  4952. if old_dests.pdf_is_dict():
  4953. fill_dict(dest_dict, old_dests)
  4954. # extract destinations new style (PDF 1.2+)
  4955. tree = mupdf.pdf_load_name_tree(pdf, dests)
  4956. if tree.pdf_is_dict():
  4957. fill_dict(dest_dict, tree)
  4958. self._resolved_names = dest_dict # store result or reuse
  4959. return dest_dict
  4960. def save(
  4961. self,
  4962. filename,
  4963. garbage=0,
  4964. clean=0,
  4965. deflate=0,
  4966. deflate_images=0,
  4967. deflate_fonts=0,
  4968. incremental=0,
  4969. ascii=0,
  4970. expand=0,
  4971. linear=0,
  4972. no_new_id=0,
  4973. appearance=0,
  4974. pretty=0,
  4975. encryption=1,
  4976. permissions=4095,
  4977. owner_pw=None,
  4978. user_pw=None,
  4979. preserve_metadata=1,
  4980. use_objstms=0,
  4981. compression_effort=0,
  4982. ):
  4983. # From %pythonprepend save
  4984. #
  4985. """Save PDF to file, pathlib.Path or file pointer."""
  4986. if self.is_closed or self.is_encrypted:
  4987. raise ValueError("document closed or encrypted")
  4988. if type(filename) is str:
  4989. pass
  4990. elif hasattr(filename, "open"): # assume: pathlib.Path
  4991. filename = str(filename)
  4992. elif hasattr(filename, "name"): # assume: file object
  4993. filename = filename.name
  4994. elif not hasattr(filename, "seek"): # assume file object
  4995. raise ValueError("filename must be str, Path or file object")
  4996. if filename == self.name and not incremental:
  4997. raise ValueError("save to original must be incremental")
  4998. if linear and use_objstms:
  4999. raise ValueError("'linear' and 'use_objstms' cannot both be requested")
  5000. if self.page_count < 1:
  5001. raise ValueError("cannot save with zero pages")
  5002. if incremental:
  5003. if self.name != filename or self.stream:
  5004. raise ValueError("incremental needs original file")
  5005. if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40:
  5006. raise ValueError("password length must not exceed 40")
  5007. pdf = _as_pdf_document(self)
  5008. opts = mupdf.PdfWriteOptions()
  5009. opts.do_incremental = incremental
  5010. opts.do_ascii = ascii
  5011. opts.do_compress = deflate
  5012. opts.do_compress_images = deflate_images
  5013. opts.do_compress_fonts = deflate_fonts
  5014. opts.do_decompress = expand
  5015. opts.do_garbage = garbage
  5016. opts.do_pretty = pretty
  5017. opts.do_linear = linear
  5018. opts.do_clean = clean
  5019. opts.do_sanitize = clean
  5020. opts.dont_regenerate_id = no_new_id
  5021. opts.do_appearance = appearance
  5022. opts.do_encrypt = encryption
  5023. opts.permissions = permissions
  5024. if owner_pw is not None:
  5025. opts.opwd_utf8_set_value(owner_pw)
  5026. elif user_pw is not None:
  5027. opts.opwd_utf8_set_value(user_pw)
  5028. if user_pw is not None:
  5029. opts.upwd_utf8_set_value(user_pw)
  5030. opts.do_preserve_metadata = preserve_metadata
  5031. opts.do_use_objstms = use_objstms
  5032. opts.compression_effort = compression_effort
  5033. out = None
  5034. pdf.m_internal.resynth_required = 0
  5035. JM_embedded_clean(pdf)
  5036. if no_new_id == 0:
  5037. JM_ensure_identity(pdf)
  5038. if isinstance(filename, str):
  5039. #log( 'calling mupdf.pdf_save_document()')
  5040. mupdf.pdf_save_document(pdf, filename, opts)
  5041. else:
  5042. out = JM_new_output_fileptr(filename)
  5043. #log( f'{type(out)=} {type(out.this)=}')
  5044. mupdf.pdf_write_document(pdf, out, opts)
  5045. out.fz_close_output()
  5046. def save_snapshot(self, filename):
  5047. """Save a file snapshot suitable for journalling."""
  5048. if self.is_closed:
  5049. raise ValueError("doc is closed")
  5050. if type(filename) is str:
  5051. pass
  5052. elif hasattr(filename, "open"): # assume: pathlib.Path
  5053. filename = str(filename)
  5054. elif hasattr(filename, "name"): # assume: file object
  5055. filename = filename.name
  5056. else:
  5057. raise ValueError("filename must be str, Path or file object")
  5058. if filename == self.name:
  5059. raise ValueError("cannot snapshot to original")
  5060. pdf = _as_pdf_document(self)
  5061. mupdf.pdf_save_snapshot(pdf, filename)
  5062. def saveIncr(self):
  5063. """ Save PDF incrementally"""
  5064. return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
  5065. def select(self, pyliste):
  5066. """Build sub-pdf with page numbers in the list."""
  5067. if self.is_closed or self.is_encrypted:
  5068. raise ValueError("document closed or encrypted")
  5069. if not self.is_pdf:
  5070. raise ValueError("is no PDF")
  5071. if not hasattr(pyliste, "__getitem__"):
  5072. raise ValueError("sequence required")
  5073. valid_range = range(len(self))
  5074. if (len(pyliste) == 0
  5075. or min(pyliste) not in valid_range
  5076. or max(pyliste) not in valid_range
  5077. ):
  5078. raise ValueError("bad page number(s)")
  5079. # get underlying pdf document,
  5080. pdf = _as_pdf_document(self)
  5081. # create page sub-pdf via pdf_rearrange_pages2().
  5082. #
  5083. if mupdf_version_tuple >= (1, 25, 3):
  5084. # We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree
  5085. # which, for example, breaks test_3705.
  5086. mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP)
  5087. else:
  5088. mupdf.pdf_rearrange_pages2(pdf, pyliste)
  5089. # remove any existing pages with their kids
  5090. self._reset_page_refs()
  5091. def set_language(self, language=None):
  5092. pdf = _as_pdf_document(self)
  5093. if not language:
  5094. lang = mupdf.FZ_LANG_UNSET
  5095. else:
  5096. lang = mupdf.fz_text_language_from_string(language)
  5097. mupdf.pdf_set_document_language(pdf, lang)
  5098. return True
  5099. def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, locked=None):
  5100. """Set the PDF keys /ON, /OFF, /RBGroups of an OC layer."""
  5101. if self.is_closed:
  5102. raise ValueError("document closed")
  5103. ocgs = set(self.get_ocgs().keys())
  5104. if ocgs == set():
  5105. raise ValueError("document has no optional content")
  5106. if on:
  5107. if type(on) not in (list, tuple):
  5108. raise ValueError("bad type: 'on'")
  5109. s = set(on).difference(ocgs)
  5110. if s != set():
  5111. raise ValueError("bad OCGs in 'on': %s" % s)
  5112. if off:
  5113. if type(off) not in (list, tuple):
  5114. raise ValueError("bad type: 'off'")
  5115. s = set(off).difference(ocgs)
  5116. if s != set():
  5117. raise ValueError("bad OCGs in 'off': %s" % s)
  5118. if locked:
  5119. if type(locked) not in (list, tuple):
  5120. raise ValueError("bad type: 'locked'")
  5121. s = set(locked).difference(ocgs)
  5122. if s != set():
  5123. raise ValueError("bad OCGs in 'locked': %s" % s)
  5124. if rbgroups:
  5125. if type(rbgroups) not in (list, tuple):
  5126. raise ValueError("bad type: 'rbgroups'")
  5127. for x in rbgroups:
  5128. if not type(x) in (list, tuple):
  5129. raise ValueError("bad RBGroup '%s'" % x)
  5130. s = set(x).difference(ocgs)
  5131. if s != set():
  5132. raise ValueError("bad OCGs in RBGroup: %s" % s)
  5133. if basestate:
  5134. basestate = str(basestate).upper()
  5135. if basestate == "UNCHANGED":
  5136. basestate = "Unchanged"
  5137. if basestate not in ("ON", "OFF", "Unchanged"):
  5138. raise ValueError("bad 'basestate'")
  5139. pdf = _as_pdf_document(self)
  5140. ocp = mupdf.pdf_dict_getl(
  5141. mupdf.pdf_trailer( pdf),
  5142. PDF_NAME('Root'),
  5143. PDF_NAME('OCProperties'),
  5144. )
  5145. if not ocp.m_internal:
  5146. return
  5147. if config == -1:
  5148. obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
  5149. else:
  5150. obj = mupdf.pdf_array_get(
  5151. mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
  5152. config,
  5153. )
  5154. if not obj.m_internal:
  5155. raise ValueError( MSG_BAD_OC_CONFIG)
  5156. JM_set_ocg_arrays( obj, basestate, on, off, rbgroups, locked)
  5157. mupdf.ll_pdf_read_ocg( pdf.m_internal)
  5158. def set_layer_ui_config(self, number, action=0):
  5159. """Set / unset OC intent configuration."""
  5160. # The user might have given the name instead of sequence number,
  5161. # so select by that name and continue with corresp. number
  5162. if isinstance(number, str):
  5163. select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number]
  5164. if select == []:
  5165. raise ValueError(f"bad OCG '{number}'.")
  5166. number = select[0] # this is the number for the name
  5167. pdf = _as_pdf_document(self)
  5168. if action == 1:
  5169. mupdf.pdf_toggle_layer_config_ui(pdf, number)
  5170. elif action == 2:
  5171. mupdf.pdf_deselect_layer_config_ui(pdf, number)
  5172. else:
  5173. mupdf.pdf_select_layer_config_ui(pdf, number)
  5174. def set_markinfo(self, markinfo: dict) -> bool:
  5175. """Set the PDF MarkInfo values."""
  5176. xref = self.pdf_catalog()
  5177. if xref == 0:
  5178. raise ValueError("not a PDF")
  5179. if not markinfo or not isinstance(markinfo, dict):
  5180. return False
  5181. valid = {"Marked": False, "UserProperties": False, "Suspects": False}
  5182. if not set(valid.keys()).issuperset(markinfo.keys()):
  5183. badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}"
  5184. raise ValueError(badkeys)
  5185. pdfdict = "<<"
  5186. valid.update(markinfo)
  5187. for key, value in valid.items():
  5188. value=str(value).lower()
  5189. if value not in ("true", "false"):
  5190. raise ValueError(f"bad key value '{key}': '{value}'")
  5191. pdfdict += f"/{key} {value}"
  5192. pdfdict += ">>"
  5193. self.xref_set_key(xref, "MarkInfo", pdfdict)
  5194. return True
  5195. def set_pagelayout(self, pagelayout: str):
  5196. """Set the PDF PageLayout value."""
  5197. valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
  5198. xref = self.pdf_catalog()
  5199. if xref == 0:
  5200. raise ValueError("not a PDF")
  5201. if not pagelayout:
  5202. raise ValueError("bad PageLayout value")
  5203. if pagelayout[0] == "/":
  5204. pagelayout = pagelayout[1:]
  5205. for v in valid:
  5206. if pagelayout.lower() == v.lower():
  5207. self.xref_set_key(xref, "PageLayout", f"/{v}")
  5208. return True
  5209. raise ValueError("bad PageLayout value")
  5210. def set_pagemode(self, pagemode: str):
  5211. """Set the PDF PageMode value."""
  5212. valid = ("UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments")
  5213. xref = self.pdf_catalog()
  5214. if xref == 0:
  5215. raise ValueError("not a PDF")
  5216. if not pagemode:
  5217. raise ValueError("bad PageMode value")
  5218. if pagemode[0] == "/":
  5219. pagemode = pagemode[1:]
  5220. for v in valid:
  5221. if pagemode.lower() == v.lower():
  5222. self.xref_set_key(xref, "PageMode", f"/{v}")
  5223. return True
  5224. raise ValueError("bad PageMode value")
  5225. def set_xml_metadata(self, metadata):
  5226. """Store XML document level metadata."""
  5227. if self.is_closed or self.is_encrypted:
  5228. raise ValueError("document closed or encrypted")
  5229. pdf = _as_pdf_document(self)
  5230. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  5231. if not root.m_internal:
  5232. RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
  5233. res = mupdf.fz_new_buffer_from_copied_data( metadata.encode('utf-8'))
  5234. xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
  5235. if xml.m_internal:
  5236. JM_update_stream( pdf, xml, res, 0)
  5237. else:
  5238. xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0)
  5239. mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata'))
  5240. mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
  5241. mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
  5242. def switch_layer(self, config, as_default=0):
  5243. """Activate an OC layer."""
  5244. pdf = _as_pdf_document(self)
  5245. cfgs = mupdf.pdf_dict_getl(
  5246. mupdf.pdf_trailer( pdf),
  5247. PDF_NAME('Root'),
  5248. PDF_NAME('OCProperties'),
  5249. PDF_NAME('Configs')
  5250. )
  5251. if not mupdf.pdf_is_array( cfgs) or not mupdf.pdf_array_len( cfgs):
  5252. if config < 1:
  5253. return
  5254. raise ValueError( MSG_BAD_OC_LAYER)
  5255. if config < 0:
  5256. return
  5257. mupdf.pdf_select_layer_config( pdf, config)
  5258. if as_default:
  5259. mupdf.pdf_set_layer_config_as_default( pdf)
  5260. mupdf.ll_pdf_read_ocg( pdf.m_internal)
  5261. def update_object(self, xref, text, page=None):
  5262. """Replace object definition source."""
  5263. if self.is_closed or self.is_encrypted:
  5264. raise ValueError("document closed or encrypted")
  5265. pdf = _as_pdf_document(self)
  5266. xreflen = mupdf.pdf_xref_len(pdf)
  5267. if not _INRANGE(xref, 1, xreflen-1):
  5268. RAISEPY("bad xref", MSG_BAD_XREF)
  5269. ENSURE_OPERATION(pdf)
  5270. # create new object with passed-in string
  5271. new_obj = JM_pdf_obj_from_str(pdf, text)
  5272. mupdf.pdf_update_object(pdf, xref, new_obj)
  5273. if page:
  5274. JM_refresh_links( _as_pdf_page(page))
  5275. def update_stream(self, xref=0, stream=None, new=1, compress=1):
  5276. """Replace xref stream part."""
  5277. if self.is_closed or self.is_encrypted:
  5278. raise ValueError("document closed or encrypted")
  5279. pdf = _as_pdf_document(self)
  5280. xreflen = mupdf.pdf_xref_len(pdf)
  5281. if xref < 1 or xref > xreflen:
  5282. raise ValueError( MSG_BAD_XREF)
  5283. # get the object
  5284. obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  5285. if not mupdf.pdf_is_dict(obj):
  5286. raise ValueError( MSG_IS_NO_DICT)
  5287. res = JM_BufferFromBytes(stream)
  5288. if not res.m_internal:
  5289. raise TypeError( MSG_BAD_BUFFER)
  5290. JM_update_stream(pdf, obj, res, compress)
  5291. pdf.dirty = 1
  5292. @property
  5293. def version_count(self):
  5294. '''
  5295. Count versions of PDF document.
  5296. '''
  5297. pdf = _as_pdf_document(self, required=0)
  5298. if pdf.m_internal:
  5299. return mupdf.pdf_count_versions(pdf)
  5300. return 0
  5301. def write(
  5302. self,
  5303. garbage=False,
  5304. clean=False,
  5305. deflate=False,
  5306. deflate_images=False,
  5307. deflate_fonts=False,
  5308. incremental=False,
  5309. ascii=False,
  5310. expand=False,
  5311. linear=False,
  5312. no_new_id=False,
  5313. appearance=False,
  5314. pretty=False,
  5315. encryption=1,
  5316. permissions=4095,
  5317. owner_pw=None,
  5318. user_pw=None,
  5319. preserve_metadata=1,
  5320. use_objstms=0,
  5321. compression_effort=0,
  5322. ):
  5323. from io import BytesIO
  5324. bio = BytesIO()
  5325. self.save(
  5326. bio,
  5327. garbage=garbage,
  5328. clean=clean,
  5329. no_new_id=no_new_id,
  5330. appearance=appearance,
  5331. deflate=deflate,
  5332. deflate_images=deflate_images,
  5333. deflate_fonts=deflate_fonts,
  5334. incremental=incremental,
  5335. ascii=ascii,
  5336. expand=expand,
  5337. linear=linear,
  5338. pretty=pretty,
  5339. encryption=encryption,
  5340. permissions=permissions,
  5341. owner_pw=owner_pw,
  5342. user_pw=user_pw,
  5343. preserve_metadata=preserve_metadata,
  5344. use_objstms=use_objstms,
  5345. compression_effort=compression_effort,
  5346. )
  5347. return bio.getvalue()
  5348. @property
  5349. def xref(self):
  5350. """PDF xref number of page."""
  5351. CheckParent(self)
  5352. return self.parent.page_xref(self.number)
  5353. def xref_get_key(self, xref, key):
  5354. """Get PDF dict key value of object at 'xref'."""
  5355. pdf = _as_pdf_document(self)
  5356. xreflen = mupdf.pdf_xref_len(pdf)
  5357. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  5358. raise ValueError( MSG_BAD_XREF)
  5359. if xref > 0:
  5360. obj = mupdf.pdf_load_object(pdf, xref)
  5361. else:
  5362. obj = mupdf.pdf_trailer(pdf)
  5363. if not obj.m_internal:
  5364. return ("null", "null")
  5365. subobj = mupdf.pdf_dict_getp(obj, key)
  5366. if not subobj.m_internal:
  5367. return ("null", "null")
  5368. text = None
  5369. if mupdf.pdf_is_indirect(subobj):
  5370. type = "xref"
  5371. text = "%i 0 R" % mupdf.pdf_to_num(subobj)
  5372. elif mupdf.pdf_is_array(subobj):
  5373. type = "array"
  5374. elif mupdf.pdf_is_dict(subobj):
  5375. type = "dict"
  5376. elif mupdf.pdf_is_int(subobj):
  5377. type = "int"
  5378. text = "%i" % mupdf.pdf_to_int(subobj)
  5379. elif mupdf.pdf_is_real(subobj):
  5380. type = "float"
  5381. elif mupdf.pdf_is_null(subobj):
  5382. type = "null"
  5383. text = "null"
  5384. elif mupdf.pdf_is_bool(subobj):
  5385. type = "bool"
  5386. if mupdf.pdf_to_bool(subobj):
  5387. text = "true"
  5388. else:
  5389. text = "false"
  5390. elif mupdf.pdf_is_name(subobj):
  5391. type = "name"
  5392. text = "/%s" % mupdf.pdf_to_name(subobj)
  5393. elif mupdf.pdf_is_string(subobj):
  5394. type = "string"
  5395. text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj))
  5396. else:
  5397. type = "unknown"
  5398. if text is None:
  5399. res = JM_object_to_buffer(subobj, 1, 0)
  5400. text = JM_UnicodeFromBuffer(res)
  5401. return (type, text)
  5402. def xref_get_keys(self, xref):
  5403. """Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer."""
  5404. pdf = _as_pdf_document(self)
  5405. xreflen = mupdf.pdf_xref_len( pdf)
  5406. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  5407. raise ValueError( MSG_BAD_XREF)
  5408. if xref > 0:
  5409. obj = mupdf.pdf_load_object( pdf, xref)
  5410. else:
  5411. obj = mupdf.pdf_trailer( pdf)
  5412. n = mupdf.pdf_dict_len( obj)
  5413. rc = []
  5414. if n == 0:
  5415. return rc
  5416. for i in range(n):
  5417. key = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( obj, i))
  5418. rc.append(key)
  5419. return rc
  5420. def xref_is_font(self, xref):
  5421. """Check if xref is a font object."""
  5422. if self.is_closed or self.is_encrypted:
  5423. raise ValueError("document closed or encrypted")
  5424. if self.xref_get_key(xref, "Type")[1] == "/Font":
  5425. return True
  5426. return False
  5427. def xref_is_image(self, xref):
  5428. """Check if xref is an image object."""
  5429. if self.is_closed or self.is_encrypted:
  5430. raise ValueError("document closed or encrypted")
  5431. if self.xref_get_key(xref, "Subtype")[1] == "/Image":
  5432. return True
  5433. return False
  5434. def xref_is_stream(self, xref=0):
  5435. """Check if xref is a stream object."""
  5436. pdf = _as_pdf_document(self, required=0)
  5437. if not pdf.m_internal:
  5438. return False # not a PDF
  5439. return bool(mupdf.pdf_obj_num_is_stream(pdf, xref))
  5440. def xref_is_xobject(self, xref):
  5441. """Check if xref is a form xobject."""
  5442. if self.is_closed or self.is_encrypted:
  5443. raise ValueError("document closed or encrypted")
  5444. if self.xref_get_key(xref, "Subtype")[1] == "/Form":
  5445. return True
  5446. return False
  5447. def xref_length(self):
  5448. """Get length of xref table."""
  5449. xreflen = 0
  5450. pdf = _as_pdf_document(self, required=0)
  5451. if pdf.m_internal:
  5452. xreflen = mupdf.pdf_xref_len(pdf)
  5453. return xreflen
  5454. def xref_object(self, xref, compressed=0, ascii=0):
  5455. """Get xref object source as a string."""
  5456. if self.is_closed:
  5457. raise ValueError("document closed")
  5458. if g_use_extra:
  5459. ret = extra.xref_object( self.this, xref, compressed, ascii)
  5460. return ret
  5461. pdf = _as_pdf_document(self)
  5462. xreflen = mupdf.pdf_xref_len(pdf)
  5463. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  5464. raise ValueError( MSG_BAD_XREF)
  5465. if xref > 0:
  5466. obj = mupdf.pdf_load_object(pdf, xref)
  5467. else:
  5468. obj = mupdf.pdf_trailer(pdf)
  5469. res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii)
  5470. text = JM_EscapeStrFromBuffer(res)
  5471. return text
  5472. def xref_set_key(self, xref, key, value):
  5473. """Set the value of a PDF dictionary key."""
  5474. if self.is_closed:
  5475. raise ValueError("document closed")
  5476. if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}):
  5477. raise ValueError("bad 'key'")
  5478. if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set():
  5479. raise ValueError("bad 'value'")
  5480. pdf = _as_pdf_document(self)
  5481. xreflen = mupdf.pdf_xref_len(pdf)
  5482. #if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  5483. # THROWMSG("bad xref")
  5484. #if len(value) == 0:
  5485. # THROWMSG("bad 'value'")
  5486. #if len(key) == 0:
  5487. # THROWMSG("bad 'key'")
  5488. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  5489. raise ValueError( MSG_BAD_XREF)
  5490. if xref != -1:
  5491. obj = mupdf.pdf_load_object(pdf, xref)
  5492. else:
  5493. obj = mupdf.pdf_trailer(pdf)
  5494. new_obj = JM_set_object_value(obj, key, value)
  5495. if not new_obj.m_internal:
  5496. return # did not work: skip update
  5497. if xref != -1:
  5498. mupdf.pdf_update_object(pdf, xref, new_obj)
  5499. else:
  5500. n = mupdf.pdf_dict_len(new_obj)
  5501. for i in range(n):
  5502. mupdf.pdf_dict_put(
  5503. obj,
  5504. mupdf.pdf_dict_get_key(new_obj, i),
  5505. mupdf.pdf_dict_get_val(new_obj, i),
  5506. )
  5507. def xref_stream(self, xref):
  5508. """Get decompressed xref stream."""
  5509. if self.is_closed or self.is_encrypted:
  5510. raise ValueError("document closed or encrypted")
  5511. pdf = _as_pdf_document(self)
  5512. xreflen = mupdf.pdf_xref_len( pdf)
  5513. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  5514. raise ValueError( MSG_BAD_XREF)
  5515. if xref >= 0:
  5516. obj = mupdf.pdf_new_indirect( pdf, xref, 0)
  5517. else:
  5518. obj = mupdf.pdf_trailer( pdf)
  5519. r = None
  5520. if mupdf.pdf_is_stream( obj):
  5521. res = mupdf.pdf_load_stream_number( pdf, xref)
  5522. r = JM_BinFromBuffer( res)
  5523. return r
  5524. def xref_stream_raw(self, xref):
  5525. """Get xref stream without decompression."""
  5526. if self.is_closed or self.is_encrypted:
  5527. raise ValueError("document closed or encrypted")
  5528. pdf = _as_pdf_document(self)
  5529. xreflen = mupdf.pdf_xref_len( pdf)
  5530. if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
  5531. raise ValueError( MSG_BAD_XREF)
  5532. if xref >= 0:
  5533. obj = mupdf.pdf_new_indirect( pdf, xref, 0)
  5534. else:
  5535. obj = mupdf.pdf_trailer( pdf)
  5536. r = None
  5537. if mupdf.pdf_is_stream( obj):
  5538. res = mupdf.pdf_load_raw_stream_number( pdf, xref)
  5539. r = JM_BinFromBuffer( res)
  5540. return r
  5541. def xref_xml_metadata(self):
  5542. """Get xref of document XML metadata."""
  5543. pdf = _as_pdf_document(self)
  5544. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  5545. if not root.m_internal:
  5546. RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
  5547. xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
  5548. xref = 0
  5549. if xml.m_internal:
  5550. xref = mupdf.pdf_to_num( xml)
  5551. return xref
  5552. __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
  5553. outline = property(lambda self: self._outline)
  5554. tobytes = write
  5555. is_stream = xref_is_stream
  5556. open = Document
  5557. class DocumentWriter:
  5558. def __enter__(self):
  5559. return self
  5560. def __exit__(self, *args):
  5561. self.close()
  5562. def __init__(self, path, options=''):
  5563. if isinstance( path, str):
  5564. pass
  5565. elif hasattr( path, 'absolute'):
  5566. path = str( path)
  5567. elif hasattr( path, 'name'):
  5568. path = path.name
  5569. if isinstance( path, str):
  5570. self.this = mupdf.FzDocumentWriter( path, options, mupdf.FzDocumentWriter.PathType_PDF)
  5571. else:
  5572. # Need to keep the Python JM_new_output_fileptr_Output instance
  5573. # alive for the lifetime of this DocumentWriter, otherwise calls
  5574. # to virtual methods implemented in Python fail. So we make it a
  5575. # member of this DocumentWriter.
  5576. #
  5577. # Unrelated to this, mupdf.FzDocumentWriter will set
  5578. # self._out.m_internal to null because ownership is passed in.
  5579. #
  5580. out = JM_new_output_fileptr( path)
  5581. self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF)
  5582. assert out.m_internal_value() == 0
  5583. assert hasattr( self.this, '_out')
  5584. def begin_page( self, mediabox):
  5585. mediabox2 = JM_rect_from_py(mediabox)
  5586. device = mupdf.fz_begin_page( self.this, mediabox2)
  5587. device_wrapper = DeviceWrapper( device)
  5588. return device_wrapper
  5589. def close( self):
  5590. mupdf.fz_close_document_writer( self.this)
  5591. def end_page( self):
  5592. mupdf.fz_end_page( self.this)
  5593. class Font:
  5594. def __del__(self):
  5595. if type(self) is not Font:
  5596. return None
  5597. def __init__(
  5598. self,
  5599. fontname=None,
  5600. fontfile=None,
  5601. fontbuffer=None,
  5602. script=0,
  5603. language=None,
  5604. ordering=-1,
  5605. is_bold=0,
  5606. is_italic=0,
  5607. is_serif=0,
  5608. embed=1,
  5609. ):
  5610. if fontbuffer:
  5611. if hasattr(fontbuffer, "getvalue"):
  5612. fontbuffer = fontbuffer.getvalue()
  5613. elif isinstance(fontbuffer, bytearray):
  5614. fontbuffer = bytes(fontbuffer)
  5615. if not isinstance(fontbuffer, bytes):
  5616. raise ValueError("bad type: 'fontbuffer'")
  5617. if isinstance(fontname, str):
  5618. fname_lower = fontname.lower()
  5619. if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower:
  5620. message("Warning: did you mean a fontfile?")
  5621. if fname_lower in ("cjk", "china-t", "china-ts"):
  5622. ordering = 0
  5623. elif fname_lower.startswith("china-s"):
  5624. ordering = 1
  5625. elif fname_lower.startswith("korea"):
  5626. ordering = 3
  5627. elif fname_lower.startswith("japan"):
  5628. ordering = 2
  5629. elif fname_lower in fitz_fontdescriptors.keys():
  5630. import pymupdf_fonts # optional fonts
  5631. fontbuffer = pymupdf_fonts.myfont(fname_lower) # make a copy
  5632. fontname = None # ensure using fontbuffer only
  5633. del pymupdf_fonts # remove package again
  5634. elif ordering < 0:
  5635. fontname = Base14_fontdict.get(fontname, fontname)
  5636. lang = mupdf.fz_text_language_from_string(language)
  5637. font = JM_get_font(fontname, fontfile,
  5638. fontbuffer, script, lang, ordering,
  5639. is_bold, is_italic, is_serif, embed)
  5640. self.this = font
  5641. def __repr__(self):
  5642. return "Font('%s')" % self.name
  5643. @property
  5644. def ascender(self):
  5645. """Return the glyph ascender value."""
  5646. return mupdf.fz_font_ascender(self.this)
  5647. @property
  5648. def bbox(self):
  5649. return self.this.fz_font_bbox()
  5650. @property
  5651. def buffer(self):
  5652. buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer))
  5653. return mupdf.fz_buffer_extract_copy( buffer_)
  5654. def char_lengths(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
  5655. """Return tuple of char lengths of unicode 'text' under a fontsize."""
  5656. lang = mupdf.fz_text_language_from_string(language)
  5657. rc = []
  5658. for ch in text:
  5659. c = ord(ch)
  5660. if small_caps:
  5661. gid = mupdf.fz_encode_character_sc(self.this, c)
  5662. if gid >= 0:
  5663. font = self.this
  5664. else:
  5665. gid, font = mupdf.fz_encode_character_with_fallback(self.this, c, script, lang)
  5666. rc.append(fontsize * mupdf.fz_advance_glyph(font, gid, wmode))
  5667. return rc
  5668. @property
  5669. def descender(self):
  5670. """Return the glyph descender value."""
  5671. return mupdf.fz_font_descender(self.this)
  5672. @property
  5673. def flags(self):
  5674. f = mupdf.ll_fz_font_flags(self.this.m_internal)
  5675. if not f:
  5676. return
  5677. assert isinstance( f, mupdf.fz_font_flags_t)
  5678. #log( '{=f}')
  5679. if mupdf_cppyy:
  5680. # cppyy includes remaining higher bits.
  5681. v = [f.is_mono]
  5682. def b(bits):
  5683. ret = v[0] & ((1 << bits)-1)
  5684. v[0] = v[0] >> bits
  5685. return ret
  5686. is_mono = b(1)
  5687. is_serif = b(1)
  5688. is_bold = b(1)
  5689. is_italic = b(1)
  5690. ft_substitute = b(1)
  5691. ft_stretch = b(1)
  5692. fake_bold = b(1)
  5693. fake_italic = b(1)
  5694. has_opentype = b(1)
  5695. invalid_bbox = b(1)
  5696. cjk_lang = b(1)
  5697. embed = b(1)
  5698. never_embed = b(1)
  5699. return {
  5700. "mono": is_mono if mupdf_cppyy else f.is_mono,
  5701. "serif": is_serif if mupdf_cppyy else f.is_serif,
  5702. "bold": is_bold if mupdf_cppyy else f.is_bold,
  5703. "italic": is_italic if mupdf_cppyy else f.is_italic,
  5704. "substitute": ft_substitute if mupdf_cppyy else f.ft_substitute,
  5705. "stretch": ft_stretch if mupdf_cppyy else f.ft_stretch,
  5706. "fake-bold": fake_bold if mupdf_cppyy else f.fake_bold,
  5707. "fake-italic": fake_italic if mupdf_cppyy else f.fake_italic,
  5708. "opentype": has_opentype if mupdf_cppyy else f.has_opentype,
  5709. "invalid-bbox": invalid_bbox if mupdf_cppyy else f.invalid_bbox,
  5710. 'cjk': cjk_lang if mupdf_cppyy else f.cjk,
  5711. 'cjk-lang': cjk_lang if mupdf_cppyy else f.cjk_lang,
  5712. 'embed': embed if mupdf_cppyy else f.embed,
  5713. 'never-embed': never_embed if mupdf_cppyy else f.never_embed,
  5714. }
  5715. def glyph_advance(self, chr_, language=None, script=0, wmode=0, small_caps=0):
  5716. """Return the glyph width of a unicode (font size 1)."""
  5717. lang = mupdf.fz_text_language_from_string(language)
  5718. if small_caps:
  5719. gid = mupdf.fz_encode_character_sc(self.this, chr_)
  5720. if gid >= 0:
  5721. font = self.this
  5722. else:
  5723. gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr_, script, lang)
  5724. return mupdf.fz_advance_glyph(font, gid, wmode)
  5725. def glyph_bbox(self, char, language=None, script=0, small_caps=0):
  5726. """Return the glyph bbox of a unicode (font size 1)."""
  5727. lang = mupdf.fz_text_language_from_string(language)
  5728. if small_caps:
  5729. gid = mupdf.fz_encode_character_sc( self.this, char)
  5730. if gid >= 0:
  5731. font = self.this
  5732. else:
  5733. gid, font = mupdf.fz_encode_character_with_fallback( self.this, char, script, lang)
  5734. return Rect(mupdf.fz_bound_glyph( font, gid, mupdf.FzMatrix()))
  5735. @property
  5736. def glyph_count(self):
  5737. return self.this.m_internal.glyph_count
  5738. def glyph_name_to_unicode(self, name):
  5739. """Return the unicode for a glyph name."""
  5740. return glyph_name_to_unicode(name)
  5741. def has_glyph(self, chr, language=None, script=0, fallback=0, small_caps=0):
  5742. """Check whether font has a glyph for this unicode."""
  5743. if fallback:
  5744. lang = mupdf.fz_text_language_from_string(language)
  5745. gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr, script, lang)
  5746. else:
  5747. if small_caps:
  5748. gid = mupdf.fz_encode_character_sc(self.this, chr)
  5749. else:
  5750. gid = mupdf.fz_encode_character(self.this, chr)
  5751. return gid
  5752. @property
  5753. def is_bold(self):
  5754. return mupdf.fz_font_is_bold( self.this)
  5755. @property
  5756. def is_italic(self):
  5757. return mupdf.fz_font_is_italic( self.this)
  5758. @property
  5759. def is_monospaced(self):
  5760. return mupdf.fz_font_is_monospaced( self.this)
  5761. @property
  5762. def is_serif(self):
  5763. return mupdf.fz_font_is_serif( self.this)
  5764. @property
  5765. def is_writable(self):
  5766. return True # see pymupdf commit ef4056ee4da2
  5767. font = self.this
  5768. flags = mupdf.ll_fz_font_flags(font.m_internal)
  5769. if mupdf_cppyy:
  5770. # cppyy doesn't handle bitfields correctly.
  5771. import cppyy
  5772. ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags)
  5773. else:
  5774. ft_substitute = flags.ft_substitute
  5775. if ( mupdf.ll_fz_font_t3_procs(font.m_internal)
  5776. or ft_substitute
  5777. or not mupdf.pdf_font_writing_supported(font)
  5778. ):
  5779. return False
  5780. return True
  5781. @property
  5782. def name(self):
  5783. ret = mupdf.fz_font_name(self.this)
  5784. #log( '{ret=}')
  5785. return ret
  5786. def text_length(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
  5787. """Return length of unicode 'text' under a fontsize."""
  5788. thisfont = self.this
  5789. lang = mupdf.fz_text_language_from_string(language)
  5790. rc = 0
  5791. if not isinstance(text, str):
  5792. raise TypeError( MSG_BAD_TEXT)
  5793. for ch in text:
  5794. c = ord(ch)
  5795. if small_caps:
  5796. gid = mupdf.fz_encode_character_sc(thisfont, c)
  5797. if gid >= 0:
  5798. font = thisfont
  5799. else:
  5800. gid, font = mupdf.fz_encode_character_with_fallback(thisfont, c, script, lang)
  5801. rc += mupdf.fz_advance_glyph(font, gid, wmode)
  5802. rc *= fontsize
  5803. return rc
  5804. def unicode_to_glyph_name(self, ch):
  5805. """Return the glyph name for a unicode."""
  5806. return unicode_to_glyph_name(ch)
  5807. def valid_codepoints(self):
  5808. '''
  5809. Returns sorted list of valid unicodes of a fz_font.
  5810. '''
  5811. ucs_gids = mupdf.fz_enumerate_font_cmap2(self.this)
  5812. ucss = [i.ucs for i in ucs_gids]
  5813. ucss_unique = set(ucss)
  5814. ucss_unique_sorted = sorted(ucss_unique)
  5815. return ucss_unique_sorted
  5816. class Graftmap:
  5817. def __del__(self):
  5818. if not type(self) is Graftmap:
  5819. return
  5820. self.thisown = False
  5821. def __init__(self, doc):
  5822. dst = _as_pdf_document(doc)
  5823. map_ = mupdf.pdf_new_graft_map(dst)
  5824. self.this = map_
  5825. self.thisown = True
  5826. class Link:
  5827. def __del__(self):
  5828. self._erase()
  5829. def __init__( self, this):
  5830. assert isinstance( this, mupdf.FzLink)
  5831. self.this = this
  5832. def __repr__(self):
  5833. CheckParent(self)
  5834. return "link on " + str(self.parent)
  5835. def __str__(self):
  5836. CheckParent(self)
  5837. return "link on " + str(self.parent)
  5838. def _border(self, doc, xref):
  5839. pdf = _as_pdf_document(doc, required=0)
  5840. if not pdf.m_internal:
  5841. return
  5842. link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  5843. if not link_obj.m_internal:
  5844. return
  5845. b = JM_annot_border(link_obj)
  5846. return b
  5847. def _colors(self, doc, xref):
  5848. pdf = _as_pdf_document(doc, required=0)
  5849. if not pdf.m_internal:
  5850. return
  5851. link_obj = mupdf.pdf_new_indirect( pdf, xref, 0)
  5852. if not link_obj.m_internal:
  5853. raise ValueError( MSG_BAD_XREF)
  5854. b = JM_annot_colors( link_obj)
  5855. return b
  5856. def _erase(self):
  5857. self.parent = None
  5858. self.thisown = False
  5859. def _setBorder(self, border, doc, xref):
  5860. pdf = _as_pdf_document(doc, required=0)
  5861. if not pdf.m_internal:
  5862. return
  5863. link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  5864. if not link_obj.m_internal:
  5865. return
  5866. b = JM_annot_set_border(border, pdf, link_obj)
  5867. return b
  5868. @property
  5869. def border(self):
  5870. return self._border(self.parent.parent.this, self.xref)
  5871. @property
  5872. def colors(self):
  5873. return self._colors(self.parent.parent.this, self.xref)
  5874. @property
  5875. def dest(self):
  5876. """Create link destination details."""
  5877. if hasattr(self, "parent") and self.parent is None:
  5878. raise ValueError("orphaned object: parent is None")
  5879. if self.parent.parent.is_closed or self.parent.parent.is_encrypted:
  5880. raise ValueError("document closed or encrypted")
  5881. doc = self.parent.parent
  5882. if self.is_external or self.uri.startswith("#"):
  5883. uri = None
  5884. else:
  5885. uri = doc.resolve_link(self.uri)
  5886. return linkDest(self, uri, doc)
  5887. @property
  5888. def flags(self)->int:
  5889. CheckParent(self)
  5890. doc = self.parent.parent
  5891. if not doc.is_pdf:
  5892. return 0
  5893. f = doc.xref_get_key(self.xref, "F")
  5894. if f[1] != "null":
  5895. return int(f[1])
  5896. return 0
  5897. @property
  5898. def is_external(self):
  5899. """Flag the link as external."""
  5900. CheckParent(self)
  5901. if g_use_extra:
  5902. return extra.Link_is_external( self.this)
  5903. this_link = self.this
  5904. if not this_link.m_internal or not this_link.m_internal.uri:
  5905. return False
  5906. return bool( mupdf.fz_is_external_link( this_link.m_internal.uri))
  5907. @property
  5908. def next(self):
  5909. """Next link."""
  5910. if not self.this.m_internal:
  5911. return None
  5912. CheckParent(self)
  5913. if 0 and g_use_extra:
  5914. val = extra.Link_next( self.this)
  5915. else:
  5916. val = self.this.next()
  5917. if not val.m_internal:
  5918. return None
  5919. val = Link( val)
  5920. if val:
  5921. val.thisown = True
  5922. val.parent = self.parent # copy owning page from prev link
  5923. val.parent._annot_refs[id(val)] = val
  5924. if self.xref > 0: # prev link has an xref
  5925. link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
  5926. link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
  5927. idx = link_xrefs.index(self.xref)
  5928. val.xref = link_xrefs[idx + 1]
  5929. val.id = link_ids[idx + 1]
  5930. else:
  5931. val.xref = 0
  5932. val.id = ""
  5933. return val
  5934. @property
  5935. def rect(self):
  5936. """Rectangle ('hot area')."""
  5937. CheckParent(self)
  5938. # utils.py:getLinkDict() appears to expect exceptions from us, so we
  5939. # ensure that we raise on error.
  5940. if self.this is None or not self.this.m_internal:
  5941. raise Exception( 'self.this.m_internal not available')
  5942. val = JM_py_from_rect( self.this.rect())
  5943. val = Rect(val)
  5944. return val
  5945. def set_border(self, border=None, width=0, dashes=None, style=None):
  5946. if type(border) is not dict:
  5947. border = {"width": width, "style": style, "dashes": dashes}
  5948. return self._setBorder(border, self.parent.parent.this, self.xref)
  5949. def set_colors(self, colors=None, stroke=None, fill=None):
  5950. """Set border colors."""
  5951. CheckParent(self)
  5952. doc = self.parent.parent
  5953. if type(colors) is not dict:
  5954. colors = {"fill": fill, "stroke": stroke}
  5955. fill = colors.get("fill")
  5956. stroke = colors.get("stroke")
  5957. if fill is not None:
  5958. message("warning: links have no fill color")
  5959. if stroke in ([], ()):
  5960. doc.xref_set_key(self.xref, "C", "[]")
  5961. return
  5962. if hasattr(stroke, "__float__"):
  5963. stroke = [float(stroke)]
  5964. CheckColor(stroke)
  5965. assert len(stroke) in (1, 3, 4)
  5966. s = f"[{_format_g(stroke)}]"
  5967. doc.xref_set_key(self.xref, "C", s)
  5968. def set_flags(self, flags):
  5969. CheckParent(self)
  5970. doc = self.parent.parent
  5971. if not doc.is_pdf:
  5972. raise ValueError("is no PDF")
  5973. if not type(flags) is int:
  5974. raise ValueError("bad 'flags' value")
  5975. doc.xref_set_key(self.xref, "F", str(flags))
  5976. return None
  5977. @property
  5978. def uri(self):
  5979. """Uri string."""
  5980. #CheckParent(self)
  5981. if g_use_extra:
  5982. return extra.link_uri(self.this)
  5983. this_link = self.this
  5984. return this_link.m_internal.uri if this_link.m_internal else ''
  5985. page = -1
  5986. class Matrix:
  5987. def __abs__(self):
  5988. return math.sqrt(sum([c*c for c in self]))
  5989. def __add__(self, m):
  5990. if hasattr(m, "__float__"):
  5991. return Matrix(self.a + m, self.b + m, self.c + m,
  5992. self.d + m, self.e + m, self.f + m)
  5993. if len(m) != 6:
  5994. raise ValueError("Matrix: bad seq len")
  5995. return Matrix(self.a + m[0], self.b + m[1], self.c + m[2],
  5996. self.d + m[3], self.e + m[4], self.f + m[5])
  5997. def __bool__(self):
  5998. return not (max(self) == min(self) == 0)
  5999. def __eq__(self, mat):
  6000. if not hasattr(mat, "__len__"):
  6001. return False
  6002. return len(mat) == 6 and not (self - mat)
  6003. def __getitem__(self, i):
  6004. return (self.a, self.b, self.c, self.d, self.e, self.f)[i]
  6005. def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None):
  6006. """
  6007. Matrix() - all zeros
  6008. Matrix(a, b, c, d, e, f)
  6009. Matrix(zoom-x, zoom-y) - zoom
  6010. Matrix(shear-x, shear-y, 1) - shear
  6011. Matrix(degree) - rotate
  6012. Matrix(Matrix) - new copy
  6013. Matrix(sequence) - from 'sequence'
  6014. Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix.
  6015. Explicit keyword args a, b, c, d, e, f override any earlier settings if
  6016. not None.
  6017. """
  6018. if not args:
  6019. self.a = self.b = self.c = self.d = self.e = self.f = 0.0
  6020. elif len(args) > 6:
  6021. raise ValueError("Matrix: bad seq len")
  6022. elif len(args) == 6: # 6 numbers
  6023. self.a, self.b, self.c, self.d, self.e, self.f = map(float, args)
  6024. elif len(args) == 1: # either an angle or a sequ
  6025. if isinstance(args[0], mupdf.FzMatrix):
  6026. self.a = args[0].a
  6027. self.b = args[0].b
  6028. self.c = args[0].c
  6029. self.d = args[0].d
  6030. self.e = args[0].e
  6031. self.f = args[0].f
  6032. elif hasattr(args[0], "__float__"):
  6033. theta = math.radians(args[0])
  6034. c_ = round(math.cos(theta), 8)
  6035. s_ = round(math.sin(theta), 8)
  6036. self.a = self.d = c_
  6037. self.b = s_
  6038. self.c = -s_
  6039. self.e = self.f = 0.0
  6040. else:
  6041. self.a, self.b, self.c, self.d, self.e, self.f = map(float, args[0])
  6042. elif len(args) == 2 or len(args) == 3 and args[2] == 0:
  6043. self.a, self.b, self.c, self.d, self.e, self.f = float(args[0]), \
  6044. 0.0, 0.0, float(args[1]), 0.0, 0.0
  6045. elif len(args) == 3 and args[2] == 1:
  6046. self.a, self.b, self.c, self.d, self.e, self.f = 1.0, \
  6047. float(args[1]), float(args[0]), 1.0, 0.0, 0.0
  6048. else:
  6049. raise ValueError("Matrix: bad args")
  6050. # Override with explicit args if specified.
  6051. if a is not None: self.a = a
  6052. if b is not None: self.b = b
  6053. if c is not None: self.c = c
  6054. if d is not None: self.d = d
  6055. if e is not None: self.e = e
  6056. if f is not None: self.f = f
  6057. def __invert__(self):
  6058. """Calculate inverted matrix."""
  6059. m1 = Matrix()
  6060. m1.invert(self)
  6061. return m1
  6062. def __len__(self):
  6063. return 6
  6064. def __mul__(self, m):
  6065. if hasattr(m, "__float__"):
  6066. return Matrix(self.a * m, self.b * m, self.c * m,
  6067. self.d * m, self.e * m, self.f * m)
  6068. m1 = Matrix(1,1)
  6069. return m1.concat(self, m)
  6070. def __neg__(self):
  6071. return Matrix(-self.a, -self.b, -self.c, -self.d, -self.e, -self.f)
  6072. def __nonzero__(self):
  6073. return not (max(self) == min(self) == 0)
  6074. def __pos__(self):
  6075. return Matrix(self)
  6076. def __repr__(self):
  6077. return "Matrix" + str(tuple(self))
  6078. def __setitem__(self, i, v):
  6079. v = float(v)
  6080. if i == 0: self.a = v
  6081. elif i == 1: self.b = v
  6082. elif i == 2: self.c = v
  6083. elif i == 3: self.d = v
  6084. elif i == 4: self.e = v
  6085. elif i == 5: self.f = v
  6086. else:
  6087. raise IndexError("index out of range")
  6088. return
  6089. def __sub__(self, m):
  6090. if hasattr(m, "__float__"):
  6091. return Matrix(self.a - m, self.b - m, self.c - m,
  6092. self.d - m, self.e - m, self.f - m)
  6093. if len(m) != 6:
  6094. raise ValueError("Matrix: bad seq len")
  6095. return Matrix(self.a - m[0], self.b - m[1], self.c - m[2],
  6096. self.d - m[3], self.e - m[4], self.f - m[5])
  6097. def __truediv__(self, m):
  6098. if hasattr(m, "__float__"):
  6099. return Matrix(self.a * 1./m, self.b * 1./m, self.c * 1./m,
  6100. self.d * 1./m, self.e * 1./m, self.f * 1./m)
  6101. m1 = util_invert_matrix(m)[1]
  6102. if not m1:
  6103. raise ZeroDivisionError("matrix not invertible")
  6104. m2 = Matrix(1,1)
  6105. return m2.concat(self, m1)
  6106. def concat(self, one, two):
  6107. """Multiply two matrices and replace current one."""
  6108. if not len(one) == len(two) == 6:
  6109. raise ValueError("Matrix: bad seq len")
  6110. self.a, self.b, self.c, self.d, self.e, self.f = util_concat_matrix(one, two)
  6111. return self
  6112. def invert(self, src=None):
  6113. """Calculate the inverted matrix. Return 0 if successful and replace
  6114. current one. Else return 1 and do nothing.
  6115. """
  6116. if src is None:
  6117. dst = util_invert_matrix(self)
  6118. else:
  6119. dst = util_invert_matrix(src)
  6120. if dst[0] == 1:
  6121. return 1
  6122. self.a, self.b, self.c, self.d, self.e, self.f = dst[1]
  6123. return 0
  6124. @property
  6125. def is_rectilinear(self):
  6126. """True if rectangles are mapped to rectangles."""
  6127. return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \
  6128. (abs(self.a) < EPSILON and abs(self.d) < EPSILON)
  6129. def prerotate(self, theta):
  6130. """Calculate pre rotation and replace current matrix."""
  6131. theta = float(theta)
  6132. while theta < 0: theta += 360
  6133. while theta >= 360: theta -= 360
  6134. if abs(0 - theta) < EPSILON:
  6135. pass
  6136. elif abs(90.0 - theta) < EPSILON:
  6137. a = self.a
  6138. b = self.b
  6139. self.a = self.c
  6140. self.b = self.d
  6141. self.c = -a
  6142. self.d = -b
  6143. elif abs(180.0 - theta) < EPSILON:
  6144. self.a = -self.a
  6145. self.b = -self.b
  6146. self.c = -self.c
  6147. self.d = -self.d
  6148. elif abs(270.0 - theta) < EPSILON:
  6149. a = self.a
  6150. b = self.b
  6151. self.a = -self.c
  6152. self.b = -self.d
  6153. self.c = a
  6154. self.d = b
  6155. else:
  6156. rad = math.radians(theta)
  6157. s = math.sin(rad)
  6158. c = math.cos(rad)
  6159. a = self.a
  6160. b = self.b
  6161. self.a = c * a + s * self.c
  6162. self.b = c * b + s * self.d
  6163. self.c =-s * a + c * self.c
  6164. self.d =-s * b + c * self.d
  6165. return self
  6166. def prescale(self, sx, sy):
  6167. """Calculate pre scaling and replace current matrix."""
  6168. sx = float(sx)
  6169. sy = float(sy)
  6170. self.a *= sx
  6171. self.b *= sx
  6172. self.c *= sy
  6173. self.d *= sy
  6174. return self
  6175. def preshear(self, h, v):
  6176. """Calculate pre shearing and replace current matrix."""
  6177. h = float(h)
  6178. v = float(v)
  6179. a, b = self.a, self.b
  6180. self.a += v * self.c
  6181. self.b += v * self.d
  6182. self.c += h * a
  6183. self.d += h * b
  6184. return self
  6185. def pretranslate(self, tx, ty):
  6186. """Calculate pre translation and replace current matrix."""
  6187. tx = float(tx)
  6188. ty = float(ty)
  6189. self.e += tx * self.a + ty * self.c
  6190. self.f += tx * self.b + ty * self.d
  6191. return self
  6192. __inv__ = __invert__
  6193. __div__ = __truediv__
  6194. norm = __abs__
  6195. class IdentityMatrix(Matrix):
  6196. """Identity matrix [1, 0, 0, 1, 0, 0]"""
  6197. def __hash__(self):
  6198. return hash((1,0,0,1,0,0))
  6199. def __init__(self):
  6200. Matrix.__init__(self, 1.0, 1.0)
  6201. def __repr__(self):
  6202. return "IdentityMatrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)"
  6203. def __setattr__(self, name, value):
  6204. if name in "ad":
  6205. self.__dict__[name] = 1.0
  6206. elif name in "bcef":
  6207. self.__dict__[name] = 0.0
  6208. else:
  6209. self.__dict__[name] = value
  6210. def checkargs(*args):
  6211. raise NotImplementedError("Identity is readonly")
  6212. Identity = IdentityMatrix()
  6213. class linkDest:
  6214. """link or outline destination details"""
  6215. def __init__(self, obj, rlink, document=None):
  6216. isExt = obj.is_external
  6217. isInt = not isExt
  6218. self.dest = ""
  6219. self.file_spec = ""
  6220. self.flags = 0
  6221. self.is_map = False
  6222. self.is_uri = False
  6223. self.kind = LINK_NONE
  6224. self.lt = Point(0, 0)
  6225. self.named = dict()
  6226. self.new_window = ""
  6227. self.page = obj.page
  6228. self.rb = Point(0, 0)
  6229. self.uri = obj.uri
  6230. def uri_to_dict(uri):
  6231. items = self.uri[1:].split('&')
  6232. ret = dict()
  6233. for item in items:
  6234. eq = item.find('=')
  6235. if eq >= 0:
  6236. ret[item[:eq]] = item[eq+1:]
  6237. else:
  6238. ret[item] = None
  6239. return ret
  6240. def unescape(name):
  6241. """Unescape '%AB' substrings to chr(0xAB)."""
  6242. split = name.replace("%%", "%25") # take care of escaped '%'
  6243. split = split.split("%")
  6244. newname = split[0]
  6245. for item in split[1:]:
  6246. piece = item[:2]
  6247. newname += chr(int(piece, base=16))
  6248. newname += item[2:]
  6249. return newname
  6250. if rlink and not self.uri.startswith("#"):
  6251. self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}"
  6252. if obj.is_external:
  6253. self.page = -1
  6254. self.kind = LINK_URI
  6255. if not self.uri:
  6256. self.page = -1
  6257. self.kind = LINK_NONE
  6258. if isInt and self.uri:
  6259. self.uri = self.uri.replace("&zoom=nan", "&zoom=0")
  6260. if self.uri.startswith("#"):
  6261. self.kind = LINK_GOTO
  6262. m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri)
  6263. if m:
  6264. self.page = int(m.group(1)) - 1
  6265. self.lt = Point(float((m.group(3))), float(m.group(4)))
  6266. self.flags = self.flags | LINK_FLAG_L_VALID | LINK_FLAG_T_VALID
  6267. else:
  6268. m = re.match('^#page=([0-9]+)$', self.uri)
  6269. if m:
  6270. self.page = int(m.group(1)) - 1
  6271. else:
  6272. self.kind = LINK_NAMED
  6273. m = re.match('^#nameddest=(.*)', self.uri)
  6274. assert document
  6275. if document and m:
  6276. named = unescape(m.group(1))
  6277. self.named = document.resolve_names().get(named)
  6278. if self.named is None:
  6279. # document.resolve_names() does not contain an
  6280. # entry for `named` so use an empty dict.
  6281. self.named = dict()
  6282. self.named['nameddest'] = named
  6283. else:
  6284. self.named = uri_to_dict(self.uri[1:])
  6285. else:
  6286. self.kind = LINK_NAMED
  6287. self.named = uri_to_dict(self.uri)
  6288. if obj.is_external:
  6289. if not self.uri:
  6290. pass
  6291. elif self.uri.startswith("file:"):
  6292. self.file_spec = self.uri[5:]
  6293. if self.file_spec.startswith("//"):
  6294. self.file_spec = self.file_spec[2:]
  6295. self.is_uri = False
  6296. self.uri = ""
  6297. self.kind = LINK_LAUNCH
  6298. ftab = self.file_spec.split("#")
  6299. if len(ftab) == 2:
  6300. if ftab[1].startswith("page="):
  6301. self.kind = LINK_GOTOR
  6302. self.file_spec = ftab[0]
  6303. self.page = int(ftab[1].split("&")[0][5:]) - 1
  6304. elif ":" in self.uri:
  6305. self.is_uri = True
  6306. self.kind = LINK_URI
  6307. else:
  6308. self.is_uri = True
  6309. self.kind = LINK_LAUNCH
  6310. assert isinstance(self.named, dict)
  6311. class Widget:
  6312. '''
  6313. Class describing a PDF form field ("widget")
  6314. '''
  6315. def __init__(self):
  6316. self.border_color = None
  6317. self.border_style = "S"
  6318. self.border_width = 0
  6319. self.border_dashes = None
  6320. self.choice_values = None # choice fields only
  6321. self.rb_parent = None # radio buttons only: xref of owning parent
  6322. self.field_name = None # field name
  6323. self.field_label = None # field label
  6324. self.field_value = None
  6325. self.field_flags = 0
  6326. self.field_display = 0
  6327. self.field_type = 0 # valid range 1 through 7
  6328. self.field_type_string = None # field type as string
  6329. self.fill_color = None
  6330. self.button_caption = None # button caption
  6331. self.is_signed = None # True / False if signature
  6332. self.text_color = (0, 0, 0)
  6333. self.text_font = "Helv"
  6334. self.text_fontsize = 0
  6335. self.text_maxlen = 0 # text fields only
  6336. self.text_format = 0 # text fields only
  6337. self._text_da = "" # /DA = default appearance
  6338. self.script = None # JavaScript (/A)
  6339. self.script_stroke = None # JavaScript (/AA/K)
  6340. self.script_format = None # JavaScript (/AA/F)
  6341. self.script_change = None # JavaScript (/AA/V)
  6342. self.script_calc = None # JavaScript (/AA/C)
  6343. self.script_blur = None # JavaScript (/AA/Bl)
  6344. self.script_focus = None # JavaScript (/AA/Fo) codespell:ignore
  6345. self.rect = None # annot value
  6346. self.xref = 0 # annot value
  6347. def __repr__(self):
  6348. #return "'%s' widget on %s" % (self.field_type_string, str(self.parent))
  6349. # No self.parent.
  6350. return f'Widget:(field_type={self.field_type_string} script={self.script})'
  6351. return "'%s' widget" % (self.field_type_string)
  6352. def _adjust_font(self):
  6353. """Ensure text_font is from our list and correctly spelled.
  6354. """
  6355. if not self.text_font:
  6356. self.text_font = "Helv"
  6357. return
  6358. valid_fonts = ("Cour", "TiRo", "Helv", "ZaDb")
  6359. for f in valid_fonts:
  6360. if self.text_font.lower() == f.lower():
  6361. self.text_font = f
  6362. return
  6363. self.text_font = "Helv"
  6364. return
  6365. def _checker(self):
  6366. """Any widget type checks.
  6367. """
  6368. if self.field_type not in range(1, 8):
  6369. raise ValueError("bad field type")
  6370. # if setting a radio button to ON, first set Off all buttons
  6371. # in the group - this is not done by MuPDF:
  6372. if self.field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON and self.field_value not in (False, "Off") and hasattr(self, "parent"):
  6373. # so we are about setting this button to ON/True
  6374. # check other buttons in same group and set them to 'Off'
  6375. doc = self.parent.parent
  6376. kids_type, kids_value = doc.xref_get_key(self.xref, "Parent/Kids")
  6377. if kids_type == "array":
  6378. xrefs = tuple(map(int, kids_value[1:-1].replace("0 R","").split()))
  6379. for xref in xrefs:
  6380. if xref != self.xref:
  6381. doc.xref_set_key(xref, "AS", "/Off")
  6382. # the calling method will now set the intended button to on and
  6383. # will find everything prepared for correct functioning.
  6384. def _parse_da(self):
  6385. """Extract font name, size and color from default appearance string (/DA object).
  6386. Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'.
  6387. """
  6388. if not self._text_da:
  6389. return
  6390. font = "Helv"
  6391. fsize = 0
  6392. col = (0, 0, 0)
  6393. dat = self._text_da.split() # split on any whitespace
  6394. for i, item in enumerate(dat):
  6395. if item == "Tf":
  6396. font = dat[i - 2][1:]
  6397. fsize = float(dat[i - 1])
  6398. dat[i] = dat[i-1] = dat[i-2] = ""
  6399. continue
  6400. if item == "g": # unicolor text
  6401. col = [(float(dat[i - 1]))]
  6402. dat[i] = dat[i-1] = ""
  6403. continue
  6404. if item == "rg": # RGB colored text
  6405. col = [float(f) for f in dat[i - 3:i]]
  6406. dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
  6407. continue
  6408. self.text_font = font
  6409. self.text_fontsize = fsize
  6410. self.text_color = col
  6411. self._text_da = ""
  6412. return
  6413. def _validate(self):
  6414. """Validate the class entries.
  6415. """
  6416. if (self.rect.is_infinite
  6417. or self.rect.is_empty
  6418. ):
  6419. raise ValueError("bad rect")
  6420. if not self.field_name:
  6421. raise ValueError("field name missing")
  6422. if self.field_label == "Unnamed":
  6423. self.field_label = None
  6424. CheckColor(self.border_color)
  6425. CheckColor(self.fill_color)
  6426. if not self.text_color:
  6427. self.text_color = (0, 0, 0)
  6428. CheckColor(self.text_color)
  6429. if not self.border_width:
  6430. self.border_width = 0
  6431. if not self.text_fontsize:
  6432. self.text_fontsize = 0
  6433. self.border_style = self.border_style.upper()[0:1]
  6434. # standardize content of JavaScript entries
  6435. btn_type = self.field_type in (
  6436. mupdf.PDF_WIDGET_TYPE_BUTTON,
  6437. mupdf.PDF_WIDGET_TYPE_CHECKBOX,
  6438. mupdf.PDF_WIDGET_TYPE_RADIOBUTTON,
  6439. )
  6440. if not self.script:
  6441. self.script = None
  6442. elif type(self.script) is not str:
  6443. raise ValueError("script content must be a string")
  6444. # buttons cannot have the following script actions
  6445. if btn_type or not self.script_calc:
  6446. self.script_calc = None
  6447. elif type(self.script_calc) is not str:
  6448. raise ValueError("script_calc content must be a string")
  6449. if btn_type or not self.script_change:
  6450. self.script_change = None
  6451. elif type(self.script_change) is not str:
  6452. raise ValueError("script_change content must be a string")
  6453. if btn_type or not self.script_format:
  6454. self.script_format = None
  6455. elif type(self.script_format) is not str:
  6456. raise ValueError("script_format content must be a string")
  6457. if btn_type or not self.script_stroke:
  6458. self.script_stroke = None
  6459. elif type(self.script_stroke) is not str:
  6460. raise ValueError("script_stroke content must be a string")
  6461. if btn_type or not self.script_blur:
  6462. self.script_blur = None
  6463. elif type(self.script_blur) is not str:
  6464. raise ValueError("script_blur content must be a string")
  6465. if btn_type or not self.script_focus:
  6466. self.script_focus = None
  6467. elif type(self.script_focus) is not str:
  6468. raise ValueError("script_focus content must be a string")
  6469. self._checker() # any field_type specific checks
  6470. def _sync_flags(self):
  6471. """Propagate the field flags.
  6472. If this widget has a "/Parent", set its field flags and that of all
  6473. its /Kids widgets to the value of the current widget.
  6474. Only possible for widgets existing in the PDF.
  6475. Returns True or False.
  6476. """
  6477. if not self.xref:
  6478. return False # no xref: widget not in the PDF
  6479. doc = self.parent.parent # the owning document
  6480. assert doc
  6481. pdf = _as_pdf_document(doc)
  6482. # load underlying PDF object
  6483. pdf_widget = mupdf.pdf_load_object(pdf, self.xref)
  6484. Parent = mupdf.pdf_dict_get(pdf_widget, PDF_NAME("Parent"))
  6485. if not Parent.pdf_is_dict():
  6486. return False # no /Parent: nothing to do
  6487. # put the field flags value into the parent field flags:
  6488. Parent.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
  6489. # also put that value into all kids of the Parent
  6490. kids = Parent.pdf_dict_get(PDF_NAME("Kids"))
  6491. if not kids.pdf_is_array():
  6492. message("warning: malformed PDF, Parent has no Kids array")
  6493. return False # no /Kids: should never happen!
  6494. for i in range(kids.pdf_array_len()): # walk through all kids
  6495. # access kid widget, and do some precautionary checks
  6496. kid = kids.pdf_array_get(i)
  6497. if not kid.pdf_is_dict():
  6498. continue
  6499. xref = kid.pdf_to_num() # get xref of the kid
  6500. if xref == self.xref: # skip self widget
  6501. continue
  6502. subtype = kid.pdf_dict_get(PDF_NAME("Subtype"))
  6503. if not subtype.pdf_to_name() == "Widget":
  6504. continue
  6505. # put the field flags value into the kid field flags:
  6506. kid.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
  6507. return True # all done
  6508. def button_states(self):
  6509. """Return the on/off state names for button widgets.
  6510. A button may have 'normal' or 'pressed down' appearances. While the 'Off'
  6511. state is usually called like this, the 'On' state is often given a name
  6512. relating to the functional context.
  6513. """
  6514. if self.field_type not in (2, 5):
  6515. return None # no button type
  6516. if hasattr(self, "parent"): # field already exists on page
  6517. doc = self.parent.parent
  6518. else:
  6519. return
  6520. xref = self.xref
  6521. states = {"normal": None, "down": None}
  6522. APN = doc.xref_get_key(xref, "AP/N")
  6523. if APN[0] == "dict":
  6524. nstates = []
  6525. APN = APN[1][2:-2]
  6526. apnt = APN.split("/")[1:]
  6527. for x in apnt:
  6528. nstates.append(x.split()[0])
  6529. states["normal"] = nstates
  6530. if APN[0] == "xref":
  6531. nstates = []
  6532. nxref = int(APN[1].split(" ")[0])
  6533. APN = doc.xref_object(nxref)
  6534. apnt = APN.split("/")[1:]
  6535. for x in apnt:
  6536. nstates.append(x.split()[0])
  6537. states["normal"] = nstates
  6538. APD = doc.xref_get_key(xref, "AP/D")
  6539. if APD[0] == "dict":
  6540. dstates = []
  6541. APD = APD[1][2:-2]
  6542. apdt = APD.split("/")[1:]
  6543. for x in apdt:
  6544. dstates.append(x.split()[0])
  6545. states["down"] = dstates
  6546. if APD[0] == "xref":
  6547. dstates = []
  6548. dxref = int(APD[1].split(" ")[0])
  6549. APD = doc.xref_object(dxref)
  6550. apdt = APD.split("/")[1:]
  6551. for x in apdt:
  6552. dstates.append(x.split()[0])
  6553. states["down"] = dstates
  6554. return states
  6555. @property
  6556. def next(self):
  6557. return self._annot.next
  6558. def on_state(self):
  6559. """Return the "On" value for button widgets.
  6560. This is useful for radio buttons mainly. Checkboxes will always return
  6561. "Yes". Radio buttons will return the string that is unequal to "Off"
  6562. as returned by method button_states().
  6563. If the radio button is new / being created, it does not yet have an
  6564. "On" value. In this case, a warning is shown and True is returned.
  6565. """
  6566. if self.field_type not in (2, 5):
  6567. return None # no checkbox or radio button
  6568. bstate = self.button_states()
  6569. if bstate is None:
  6570. bstate = dict()
  6571. for k in bstate.keys():
  6572. for v in bstate[k]:
  6573. if v != "Off":
  6574. return v
  6575. message("warning: radio button has no 'On' value.")
  6576. return True
  6577. def reset(self):
  6578. """Reset the field value to its default.
  6579. """
  6580. TOOLS._reset_widget(self._annot)
  6581. def update(self, sync_flags=False):
  6582. """Reflect Python object in the PDF."""
  6583. self._validate()
  6584. self._adjust_font() # ensure valid text_font name
  6585. # now create the /DA string
  6586. self._text_da = ""
  6587. if len(self.text_color) == 3:
  6588. fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + self._text_da
  6589. elif len(self.text_color) == 1:
  6590. fmt = "{:g} g /{f:s} {s:g} Tf" + self._text_da
  6591. elif len(self.text_color) == 4:
  6592. fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" + self._text_da
  6593. self._text_da = fmt.format(*self.text_color, f=self.text_font,
  6594. s=self.text_fontsize)
  6595. # finally update the widget
  6596. # if widget has a '/AA/C' script, make sure it is in the '/CO'
  6597. # array of the '/AcroForm' dictionary.
  6598. if self.script_calc: # there is a "calculation" script:
  6599. # make sure we are in the /CO array
  6600. util_ensure_widget_calc(self._annot)
  6601. # finally update the widget
  6602. TOOLS._save_widget(self._annot, self)
  6603. self._text_da = ""
  6604. if sync_flags:
  6605. self._sync_flags() # propagate field flags to parent and kids
  6606. from . import _extra
  6607. class Outline:
  6608. def __init__(self, ol):
  6609. self.this = ol
  6610. @property
  6611. def dest(self):
  6612. '''outline destination details'''
  6613. return linkDest(self, None, None)
  6614. def destination(self, document):
  6615. '''
  6616. Like `dest` property but uses `document` to resolve destinations for
  6617. kind=LINK_NAMED.
  6618. '''
  6619. return linkDest(self, None, document)
  6620. @property
  6621. def down(self):
  6622. ol = self.this
  6623. down_ol = ol.down()
  6624. if not down_ol.m_internal:
  6625. return
  6626. return Outline(down_ol)
  6627. @property
  6628. def is_external(self):
  6629. if g_use_extra:
  6630. # calling _extra.* here appears to save significant time in
  6631. # test_toc.py:test_full_toc, 1.2s=>0.94s.
  6632. #
  6633. return _extra.Outline_is_external( self.this)
  6634. ol = self.this
  6635. if not ol.m_internal:
  6636. return False
  6637. uri = ol.m_internal.uri if 1 else ol.uri()
  6638. if uri is None:
  6639. return False
  6640. return mupdf.fz_is_external_link(uri)
  6641. @property
  6642. def is_open(self):
  6643. if 1:
  6644. return self.this.m_internal.is_open
  6645. return self.this.is_open()
  6646. @property
  6647. def next(self):
  6648. ol = self.this
  6649. next_ol = ol.next()
  6650. if not next_ol.m_internal:
  6651. return
  6652. return Outline(next_ol)
  6653. @property
  6654. def page(self):
  6655. if 1:
  6656. return self.this.m_internal.page.page
  6657. return self.this.page().page
  6658. @property
  6659. def title(self):
  6660. return self.this.m_internal.title
  6661. @property
  6662. def uri(self):
  6663. ol = self.this
  6664. if not ol.m_internal:
  6665. return None
  6666. return ol.m_internal.uri
  6667. @property
  6668. def x(self):
  6669. return self.this.m_internal.x
  6670. @property
  6671. def y(self):
  6672. return self.this.m_internal.y
  6673. __slots__ = [ 'this']
  6674. def _make_PdfFilterOptions(
  6675. recurse=0,
  6676. instance_forms=0,
  6677. ascii=0,
  6678. no_update=0,
  6679. sanitize=0,
  6680. sopts=None,
  6681. ):
  6682. '''
  6683. Returns a mupdf.PdfFilterOptions instance.
  6684. '''
  6685. filter_ = mupdf.PdfFilterOptions()
  6686. filter_.recurse = recurse
  6687. filter_.instance_forms = instance_forms
  6688. filter_.ascii = ascii
  6689. filter_.no_update = no_update
  6690. if sanitize:
  6691. # We want to use a PdfFilterFactory whose `.filter` fn pointer is
  6692. # set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to
  6693. # get access to this raw fn in Python; and on Windows raw MuPDF
  6694. # functions are not even available to C++.
  6695. #
  6696. # So we use SWIG Director to implement our own
  6697. # PdfFilterFactory whose `filter()` method calls
  6698. # `mupdf.ll_pdf_new_sanitize_filter()`.
  6699. if sopts:
  6700. assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions)
  6701. else:
  6702. sopts = mupdf.PdfSanitizeFilterOptions()
  6703. class Factory(mupdf.PdfFilterFactory2):
  6704. def __init__(self):
  6705. super().__init__()
  6706. self.use_virtual_filter()
  6707. self.sopts = sopts
  6708. def filter(self, ctx, doc, chain, struct_parents, transform, options):
  6709. if 0:
  6710. log(f'sanitize filter.filter():')
  6711. log(f' {self=}')
  6712. log(f' {ctx=}')
  6713. log(f' {doc=}')
  6714. log(f' {chain=}')
  6715. log(f' {struct_parents=}')
  6716. log(f' {transform=}')
  6717. log(f' {options=}')
  6718. log(f' {self.sopts.internal()=}')
  6719. return mupdf.ll_pdf_new_sanitize_filter(
  6720. doc,
  6721. chain,
  6722. struct_parents,
  6723. transform,
  6724. options,
  6725. self.sopts.internal(),
  6726. )
  6727. factory = Factory()
  6728. filter_.add_factory(factory.internal())
  6729. filter_._factory = factory
  6730. return filter_
  6731. class Page:
  6732. def __init__(self, page, document):
  6733. assert isinstance(page, (mupdf.FzPage, mupdf.PdfPage)), f'page is: {page}'
  6734. self.this = page
  6735. self.thisown = True
  6736. self.last_point = None
  6737. self.draw_cont = ''
  6738. self._annot_refs = dict()
  6739. self.parent = document
  6740. if page.m_internal:
  6741. if isinstance( page, mupdf.PdfPage):
  6742. self.number = page.m_internal.super.number
  6743. else:
  6744. self.number = page.m_internal.number
  6745. else:
  6746. self.number = None
  6747. def __repr__(self):
  6748. return self.__str__()
  6749. CheckParent(self)
  6750. x = self.parent.name
  6751. if self.parent.stream is not None:
  6752. x = "<memory, doc# %i>" % (self.parent._graft_id,)
  6753. if x == "":
  6754. x = "<new PDF, doc# %i>" % self.parent._graft_id
  6755. return "page %s of %s" % (self.number, x)
  6756. def __str__(self):
  6757. #CheckParent(self)
  6758. parent = getattr(self, 'parent', None)
  6759. if isinstance(self.this.m_internal, mupdf.pdf_page):
  6760. number = self.this.m_internal.super.number
  6761. else:
  6762. number = self.this.m_internal.number
  6763. ret = f'page {number}'
  6764. if parent:
  6765. x = self.parent.name
  6766. if self.parent.stream is not None:
  6767. x = "<memory, doc# %i>" % (self.parent._graft_id,)
  6768. if x == "":
  6769. x = "<new PDF, doc# %i>" % self.parent._graft_id
  6770. ret += f' of {x}'
  6771. return ret
  6772. def _add_caret_annot(self, point):
  6773. if g_use_extra:
  6774. annot = extra._add_caret_annot( self.this, JM_point_from_py(point))
  6775. else:
  6776. page = self._pdf_page()
  6777. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_CARET)
  6778. if point:
  6779. p = JM_point_from_py(point)
  6780. r = mupdf.pdf_annot_rect(annot)
  6781. r = mupdf.FzRect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
  6782. mupdf.pdf_set_annot_rect(annot, r)
  6783. mupdf.pdf_update_annot(annot)
  6784. JM_add_annot_id(annot, "A")
  6785. return annot
  6786. def _add_file_annot(self, point, buffer_, filename, ufilename=None, desc=None, icon=None):
  6787. page = self._pdf_page()
  6788. uf = ufilename if ufilename else filename
  6789. d = desc if desc else filename
  6790. p = JM_point_from_py(point)
  6791. filebuf = JM_BufferFromBytes(buffer_)
  6792. if not filebuf.m_internal:
  6793. raise TypeError( MSG_BAD_BUFFER)
  6794. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FILE_ATTACHMENT)
  6795. r = mupdf.pdf_annot_rect(annot)
  6796. r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
  6797. mupdf.pdf_set_annot_rect(annot, r)
  6798. flags = mupdf.PDF_ANNOT_IS_PRINT
  6799. mupdf.pdf_set_annot_flags(annot, flags)
  6800. if icon:
  6801. mupdf.pdf_set_annot_icon_name(annot, icon)
  6802. val = JM_embed_file(page.doc(), filebuf, filename, uf, d, 1)
  6803. mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('FS'), val)
  6804. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('Contents'), filename)
  6805. mupdf.pdf_update_annot(annot)
  6806. mupdf.pdf_set_annot_rect(annot, r)
  6807. mupdf.pdf_set_annot_flags(annot, flags)
  6808. JM_add_annot_id(annot, "A")
  6809. return Annot(annot)
  6810. def _add_freetext_annot(
  6811. self, rect,
  6812. text,
  6813. fontsize=11,
  6814. fontname=None,
  6815. text_color=None,
  6816. fill_color=None,
  6817. border_color=None,
  6818. border_width=0,
  6819. dashes=None,
  6820. callout=None,
  6821. line_end=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
  6822. opacity=1,
  6823. align=0,
  6824. rotate=0,
  6825. richtext=False,
  6826. style=None,
  6827. ):
  6828. rc = f"""<?xml version="1.0"?>
  6829. <body xmlns="http://www.w3.org/1999/xtml"
  6830. xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"
  6831. xfa:contentType="text/html" xfa:APIVersion="Acrobat:8.0.0" xfa:spec="2.4">
  6832. {text}"""
  6833. page = self._pdf_page()
  6834. if border_color and not richtext:
  6835. raise ValueError("cannot set border_color if rich_text is False")
  6836. if border_color and not text_color:
  6837. text_color = border_color
  6838. nfcol, fcol = JM_color_FromSequence(fill_color)
  6839. ntcol, tcol = JM_color_FromSequence(text_color)
  6840. r = JM_rect_from_py(rect)
  6841. if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
  6842. raise ValueError( MSG_BAD_RECT)
  6843. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FREE_TEXT)
  6844. annot_obj = mupdf.pdf_annot_obj(annot)
  6845. #insert text as 'contents' or 'RC' depending on 'richtext'
  6846. if not richtext:
  6847. mupdf.pdf_set_annot_contents(annot, text)
  6848. else:
  6849. mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("RC"), rc)
  6850. if style:
  6851. mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("DS"), style)
  6852. mupdf.pdf_set_annot_rect(annot, r)
  6853. while rotate < 0:
  6854. rotate += 360
  6855. while rotate >= 360:
  6856. rotate -= 360
  6857. if rotate != 0:
  6858. mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
  6859. mupdf.pdf_set_annot_quadding(annot, align)
  6860. if nfcol > 0:
  6861. mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
  6862. mupdf.pdf_set_annot_border_width(annot, border_width)
  6863. mupdf.pdf_set_annot_opacity(annot, opacity)
  6864. if dashes:
  6865. for d in dashes:
  6866. mupdf.pdf_add_annot_border_dash_item(annot, float(d))
  6867. # Insert callout information
  6868. if callout:
  6869. mupdf.pdf_dict_put(annot_obj, PDF_NAME("IT"), PDF_NAME("FreeTextCallout"))
  6870. mupdf.pdf_set_annot_callout_style(annot, line_end)
  6871. point_count = len(callout)
  6872. extra.JM_set_annot_callout_line(annot, tuple(callout), point_count)
  6873. # insert the default appearance string
  6874. if not richtext:
  6875. JM_make_annot_DA(annot, ntcol, tcol, fontname, fontsize)
  6876. mupdf.pdf_update_annot(annot)
  6877. JM_add_annot_id(annot, "A")
  6878. val = Annot(annot)
  6879. return val
  6880. def _add_ink_annot(self, list):
  6881. page = _as_pdf_page(self.this)
  6882. if not PySequence_Check(list):
  6883. raise ValueError( MSG_BAD_ARG_INK_ANNOT)
  6884. ctm = mupdf.FzMatrix()
  6885. mupdf.pdf_page_transform(page, mupdf.FzRect(0), ctm)
  6886. inv_ctm = mupdf.fz_invert_matrix(ctm)
  6887. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_INK)
  6888. annot_obj = mupdf.pdf_annot_obj(annot)
  6889. n0 = len(list)
  6890. inklist = mupdf.pdf_new_array(page.doc(), n0)
  6891. for j in range(n0):
  6892. sublist = list[j]
  6893. n1 = len(sublist)
  6894. stroke = mupdf.pdf_new_array(page.doc(), 2 * n1)
  6895. for i in range(n1):
  6896. p = sublist[i]
  6897. if not PySequence_Check(p) or PySequence_Size(p) != 2:
  6898. raise ValueError( MSG_BAD_ARG_INK_ANNOT)
  6899. point = mupdf.fz_transform_point(JM_point_from_py(p), inv_ctm)
  6900. mupdf.pdf_array_push_real(stroke, point.x)
  6901. mupdf.pdf_array_push_real(stroke, point.y)
  6902. mupdf.pdf_array_push(inklist, stroke)
  6903. mupdf.pdf_dict_put(annot_obj, PDF_NAME('InkList'), inklist)
  6904. mupdf.pdf_update_annot(annot)
  6905. JM_add_annot_id(annot, "A")
  6906. return Annot(annot)
  6907. def _add_line_annot(self, p1, p2):
  6908. page = self._pdf_page()
  6909. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE)
  6910. a = JM_point_from_py(p1)
  6911. b = JM_point_from_py(p2)
  6912. mupdf.pdf_set_annot_line(annot, a, b)
  6913. mupdf.pdf_update_annot(annot)
  6914. JM_add_annot_id(annot, "A")
  6915. assert annot.m_internal
  6916. return Annot(annot)
  6917. def _add_multiline(self, points, annot_type):
  6918. page = self._pdf_page()
  6919. if len(points) < 2:
  6920. raise ValueError( MSG_BAD_ARG_POINTS)
  6921. annot = mupdf.pdf_create_annot(page, annot_type)
  6922. for p in points:
  6923. if (PySequence_Size(p) != 2):
  6924. raise ValueError( MSG_BAD_ARG_POINTS)
  6925. point = JM_point_from_py(p)
  6926. mupdf.pdf_add_annot_vertex(annot, point)
  6927. mupdf.pdf_update_annot(annot)
  6928. JM_add_annot_id(annot, "A")
  6929. return Annot(annot)
  6930. def _add_redact_annot(self, quad, text=None, da_str=None, align=0, fill=None, text_color=None):
  6931. page = self._pdf_page()
  6932. fcol = [ 1, 1, 1, 0]
  6933. nfcol = 0
  6934. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_REDACT)
  6935. q = JM_quad_from_py(quad)
  6936. r = mupdf.fz_rect_from_quad(q)
  6937. # TODO calculate de-rotated rect
  6938. mupdf.pdf_set_annot_rect(annot, r)
  6939. if fill:
  6940. nfcol, fcol = JM_color_FromSequence(fill)
  6941. arr = mupdf.pdf_new_array(page.doc(), nfcol)
  6942. for i in range(nfcol):
  6943. mupdf.pdf_array_push_real(arr, fcol[i])
  6944. mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('IC'), arr)
  6945. if text:
  6946. assert da_str
  6947. mupdf.pdf_dict_puts(
  6948. mupdf.pdf_annot_obj(annot),
  6949. "OverlayText",
  6950. mupdf.pdf_new_text_string(text),
  6951. )
  6952. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('DA'), da_str)
  6953. mupdf.pdf_dict_put_int(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'), align)
  6954. mupdf.pdf_update_annot(annot)
  6955. JM_add_annot_id(annot, "A")
  6956. annot = mupdf.ll_pdf_keep_annot(annot.m_internal)
  6957. annot = mupdf.PdfAnnot( annot)
  6958. return Annot(annot)
  6959. def _add_square_or_circle(self, rect, annot_type):
  6960. page = self._pdf_page()
  6961. r = JM_rect_from_py(rect)
  6962. if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
  6963. raise ValueError( MSG_BAD_RECT)
  6964. annot = mupdf.pdf_create_annot(page, annot_type)
  6965. mupdf.pdf_set_annot_rect(annot, r)
  6966. mupdf.pdf_update_annot(annot)
  6967. JM_add_annot_id(annot, "A")
  6968. assert annot.m_internal
  6969. return Annot(annot)
  6970. def _add_stamp_annot(self, rect, stamp=0):
  6971. rect = Rect(rect)
  6972. r = JM_rect_from_py(rect)
  6973. if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
  6974. raise ValueError(MSG_BAD_RECT)
  6975. page = self._pdf_page()
  6976. stamp_id = [
  6977. "Approved",
  6978. "AsIs",
  6979. "Confidential",
  6980. "Departmental",
  6981. "Experimental",
  6982. "Expired",
  6983. "Final",
  6984. "ForComment",
  6985. "ForPublicRelease",
  6986. "NotApproved",
  6987. "NotForPublicRelease",
  6988. "Sold",
  6989. "TopSecret",
  6990. "Draft",
  6991. ]
  6992. n = len(stamp_id)
  6993. buf = None
  6994. name = None
  6995. if stamp in range(n):
  6996. name = stamp_id[stamp]
  6997. elif isinstance(stamp, Pixmap):
  6998. buf = stamp.tobytes()
  6999. elif isinstance(stamp, str):
  7000. buf = pathlib.Path(stamp).read_bytes()
  7001. elif isinstance(stamp, (bytes, bytearray)):
  7002. buf = stamp
  7003. elif isinstance(stamp, io.BytesIO):
  7004. buf = stamp.getvalue()
  7005. else:
  7006. name = stamp_id[0]
  7007. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_STAMP)
  7008. if buf: # image stamp
  7009. fzbuff = mupdf.fz_new_buffer_from_copied_data(buf)
  7010. img = mupdf.fz_new_image_from_buffer(fzbuff)
  7011. # compute image boundary box on page
  7012. w, h = img.w(), img.h()
  7013. scale = min(rect.width / w, rect.height / h)
  7014. width = w * scale # bbox width
  7015. height = h * scale # bbox height
  7016. # center of "rect"
  7017. center = (rect.tl + rect.br) / 2
  7018. x0 = center.x - width / 2
  7019. y0 = center.y - height / 2
  7020. x1 = x0 + width
  7021. y1 = y0 + height
  7022. r = mupdf.fz_make_rect(x0, y0, x1, y1)
  7023. mupdf.pdf_set_annot_rect(annot, r)
  7024. mupdf.pdf_set_annot_stamp_image(annot, img)
  7025. mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), mupdf.pdf_new_name("ImageStamp"))
  7026. mupdf.pdf_set_annot_contents(annot, "Image Stamp")
  7027. else: # text stamp
  7028. mupdf.pdf_set_annot_rect(annot, r)
  7029. mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), PDF_NAME(name))
  7030. mupdf.pdf_set_annot_contents(annot, name)
  7031. mupdf.pdf_update_annot(annot)
  7032. JM_add_annot_id(annot, "A")
  7033. return Annot(annot)
  7034. def _add_text_annot(self, point, text, icon=None):
  7035. page = self._pdf_page()
  7036. p = JM_point_from_py( point)
  7037. annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT)
  7038. r = mupdf.pdf_annot_rect(annot)
  7039. r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
  7040. mupdf.pdf_set_annot_rect(annot, r)
  7041. mupdf.pdf_set_annot_contents(annot, text)
  7042. if icon:
  7043. mupdf.pdf_set_annot_icon_name(annot, icon)
  7044. mupdf.pdf_update_annot(annot)
  7045. JM_add_annot_id(annot, "A")
  7046. return Annot(annot)
  7047. def _add_text_marker(self, quads, annot_type):
  7048. CheckParent(self)
  7049. if not self.parent.is_pdf:
  7050. raise ValueError("is no PDF")
  7051. val = Page__add_text_marker(self, quads, annot_type)
  7052. if not val:
  7053. return None
  7054. val.parent = weakref.proxy(self)
  7055. self._annot_refs[id(val)] = val
  7056. return val
  7057. def _addAnnot_FromString(self, linklist):
  7058. """Add links from list of object sources."""
  7059. CheckParent(self)
  7060. if g_use_extra:
  7061. self.__class__._addAnnot_FromString = extra.Page_addAnnot_FromString
  7062. #log('Page._addAnnot_FromString() deferring to extra.Page_addAnnot_FromString().')
  7063. return extra.Page_addAnnot_FromString( self.this, linklist)
  7064. page = _as_pdf_page(self.this)
  7065. lcount = len(linklist) # link count
  7066. if lcount < 1:
  7067. return
  7068. i = -1
  7069. # insert links from the provided sources
  7070. if not isinstance(linklist, tuple):
  7071. raise ValueError( "bad 'linklist' argument")
  7072. if not mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')).m_internal:
  7073. mupdf.pdf_dict_put_array( page.obj(), PDF_NAME('Annots'), lcount)
  7074. annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
  7075. assert annots.m_internal, f'{lcount=} {annots.m_internal=}'
  7076. for i in range(lcount):
  7077. txtpy = linklist[i]
  7078. text = JM_StrAsChar(txtpy)
  7079. if not text:
  7080. message("skipping bad link / annot item %i.", i)
  7081. continue
  7082. try:
  7083. annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text))
  7084. ind_obj = mupdf.pdf_new_indirect( page.doc(), mupdf.pdf_to_num( annot), 0)
  7085. mupdf.pdf_array_push( annots, ind_obj)
  7086. except Exception:
  7087. if g_exceptions_verbose: exception_info()
  7088. message("skipping bad link / annot item %i.\n" % i)
  7089. def _addWidget(self, field_type, field_name):
  7090. page = self._pdf_page()
  7091. pdf = page.doc()
  7092. annot = JM_create_widget(pdf, page, field_type, field_name)
  7093. if not annot.m_internal:
  7094. raise RuntimeError( "cannot create widget")
  7095. JM_add_annot_id(annot, "W")
  7096. return Annot(annot)
  7097. def _apply_redactions(self, text, images, graphics):
  7098. page = self._pdf_page()
  7099. opts = mupdf.PdfRedactOptions()
  7100. opts.black_boxes = 0 # no black boxes
  7101. opts.text = text # how to treat text
  7102. opts.image_method = images # how to treat images
  7103. opts.line_art = graphics # how to treat vector graphics
  7104. success = mupdf.pdf_redact_page(page.doc(), page, opts)
  7105. return success
  7106. def _erase(self):
  7107. self._reset_annot_refs()
  7108. try:
  7109. self.parent._forget_page(self)
  7110. except Exception:
  7111. exception_info()
  7112. pass
  7113. self.parent = None
  7114. self.thisown = False
  7115. self.number = None
  7116. self.this = None
  7117. def _count_q_balance(self):
  7118. """Count missing graphic state pushs and pops.
  7119. Returns:
  7120. A pair of integers (push, pop). Push is the number of missing
  7121. PDF "q" commands, pop is the number of "Q" commands.
  7122. A balanced graphics state for the page will be reached if its
  7123. /Contents is prepended with 'push' copies of string "q\n"
  7124. and appended with 'pop' copies of "\nQ".
  7125. """
  7126. page = _as_pdf_page(self) # need the underlying PDF page
  7127. res = mupdf.pdf_dict_get( # access /Resources
  7128. page.obj(),
  7129. mupdf.PDF_ENUM_NAME_Resources,
  7130. )
  7131. cont = mupdf.pdf_dict_get( # access /Contents
  7132. page.obj(),
  7133. mupdf.PDF_ENUM_NAME_Contents,
  7134. )
  7135. pdf = _as_pdf_document(self.parent) # need underlying PDF document
  7136. # return value of MuPDF function
  7137. return mupdf.pdf_count_q_balance_outparams_fn(pdf, res, cont)
  7138. def _get_optional_content(self, oc: OptInt) -> OptStr:
  7139. if oc is None or oc == 0:
  7140. return None
  7141. doc = self.parent
  7142. check = doc.xref_object(oc, compressed=True)
  7143. if not ("/Type/OCG" in check or "/Type/OCMD" in check):
  7144. #log( 'raising "bad optional content"')
  7145. raise ValueError("bad optional content: 'oc'")
  7146. #log( 'Looking at self._get_resource_properties()')
  7147. props = {}
  7148. for p, x in self._get_resource_properties():
  7149. props[x] = p
  7150. if oc in props.keys():
  7151. return props[oc]
  7152. i = 0
  7153. mc = "MC%i" % i
  7154. while mc in props.values():
  7155. i += 1
  7156. mc = "MC%i" % i
  7157. self._set_resource_property(mc, oc)
  7158. #log( 'returning {mc=}')
  7159. return mc
  7160. def _get_resource_properties(self):
  7161. '''
  7162. page list Resource/Properties
  7163. '''
  7164. page = self._pdf_page()
  7165. rc = JM_get_resource_properties(page.obj())
  7166. return rc
  7167. def _get_textpage(self, clip=None, flags=0, matrix=None):
  7168. if g_use_extra:
  7169. ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
  7170. tpage = mupdf.FzStextPage(ll_tpage)
  7171. return tpage
  7172. page = self.this
  7173. options = mupdf.FzStextOptions(flags)
  7174. rect = JM_rect_from_py(clip)
  7175. # Default to page's rect if `clip` not specified, for #2048.
  7176. rect = mupdf.fz_bound_page(page) if clip is None else JM_rect_from_py(clip)
  7177. ctm = JM_matrix_from_py(matrix)
  7178. tpage = mupdf.FzStextPage(rect)
  7179. dev = mupdf.fz_new_stext_device(tpage, options)
  7180. if _globals.no_device_caching:
  7181. mupdf.fz_enable_device_hints( dev, mupdf.FZ_NO_CACHE)
  7182. if isinstance(page, mupdf.FzPage):
  7183. pass
  7184. elif isinstance(page, mupdf.PdfPage):
  7185. page = page.super()
  7186. else:
  7187. assert 0, f'Unrecognised {type(page)=}'
  7188. mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie())
  7189. mupdf.fz_close_device(dev)
  7190. return tpage
  7191. def _insert_image(self,
  7192. filename=None, pixmap=None, stream=None, imask=None, clip=None,
  7193. overlay=1, rotate=0, keep_proportion=1, oc=0, width=0, height=0,
  7194. xref=0, alpha=-1, _imgname=None, digests=None
  7195. ):
  7196. maskbuf = mupdf.FzBuffer()
  7197. page = self._pdf_page()
  7198. # This will create an empty PdfDocument with a call to
  7199. # pdf_new_document() then assign page.doc()'s return value to it (which
  7200. # drop the original empty pdf_document).
  7201. pdf = page.doc()
  7202. w = width
  7203. h = height
  7204. img_xref = xref
  7205. rc_digest = 0
  7206. do_process_pixmap = 1
  7207. do_process_stream = 1
  7208. do_have_imask = 1
  7209. do_have_image = 1
  7210. do_have_xref = 1
  7211. if xref > 0:
  7212. ref = mupdf.pdf_new_indirect(pdf, xref, 0)
  7213. w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
  7214. h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
  7215. if w + h == 0:
  7216. raise ValueError( MSG_IS_NO_IMAGE)
  7217. #goto have_xref()
  7218. do_process_pixmap = 0
  7219. do_process_stream = 0
  7220. do_have_imask = 0
  7221. do_have_image = 0
  7222. else:
  7223. if stream:
  7224. imgbuf = JM_BufferFromBytes(stream)
  7225. do_process_pixmap = 0
  7226. else:
  7227. if filename:
  7228. imgbuf = mupdf.fz_read_file(filename)
  7229. #goto have_stream()
  7230. do_process_pixmap = 0
  7231. if do_process_pixmap:
  7232. #log( 'do_process_pixmap')
  7233. # process pixmap ---------------------------------
  7234. arg_pix = pixmap.this
  7235. w = arg_pix.w()
  7236. h = arg_pix.h()
  7237. digest = mupdf.fz_md5_pixmap2(arg_pix)
  7238. md5_py = digest
  7239. temp = digests.get(md5_py, None)
  7240. if temp is not None:
  7241. img_xref = temp
  7242. ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
  7243. #goto have_xref()
  7244. do_process_stream = 0
  7245. do_have_imask = 0
  7246. do_have_image = 0
  7247. else:
  7248. if arg_pix.alpha() == 0:
  7249. image = mupdf.fz_new_image_from_pixmap(arg_pix, mupdf.FzImage())
  7250. else:
  7251. pm = mupdf.fz_convert_pixmap(
  7252. arg_pix,
  7253. mupdf.FzColorspace(),
  7254. mupdf.FzColorspace(),
  7255. mupdf.FzDefaultColorspaces(None),
  7256. mupdf.FzColorParams(),
  7257. 1,
  7258. )
  7259. pm.alpha = 0
  7260. pm.colorspace = None
  7261. mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage())
  7262. image = mupdf.fz_new_image_from_pixmap(arg_pix, mask)
  7263. #goto have_image()
  7264. do_process_stream = 0
  7265. do_have_imask = 0
  7266. if do_process_stream:
  7267. #log( 'do_process_stream')
  7268. # process stream ---------------------------------
  7269. state = mupdf.FzMd5()
  7270. if mupdf_cppyy:
  7271. mupdf.fz_md5_update_buffer( state, imgbuf)
  7272. else:
  7273. mupdf.fz_md5_update(state, imgbuf.m_internal.data, imgbuf.m_internal.len)
  7274. if imask:
  7275. maskbuf = JM_BufferFromBytes(imask)
  7276. if mupdf_cppyy:
  7277. mupdf.fz_md5_update_buffer( state, maskbuf)
  7278. else:
  7279. mupdf.fz_md5_update(state, maskbuf.m_internal.data, maskbuf.m_internal.len)
  7280. digest = mupdf.fz_md5_final2(state)
  7281. md5_py = bytes(digest)
  7282. temp = digests.get(md5_py, None)
  7283. if temp is not None:
  7284. img_xref = temp
  7285. ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
  7286. w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
  7287. h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
  7288. #goto have_xref()
  7289. do_have_imask = 0
  7290. do_have_image = 0
  7291. else:
  7292. image = mupdf.fz_new_image_from_buffer(imgbuf)
  7293. w = image.w()
  7294. h = image.h()
  7295. if not imask:
  7296. #goto have_image()
  7297. do_have_imask = 0
  7298. if do_have_imask:
  7299. # `fz_compressed_buffer` is reference counted and
  7300. # `mupdf.fz_new_image_from_compressed_buffer2()`
  7301. # is povided as a Swig-friendly wrapper for
  7302. # `fz_new_image_from_compressed_buffer()`, so we can do things
  7303. # straightfowardly.
  7304. #
  7305. cbuf1 = mupdf.fz_compressed_image_buffer( image)
  7306. if not cbuf1.m_internal:
  7307. raise ValueError( "uncompressed image cannot have mask")
  7308. bpc = image.bpc()
  7309. colorspace = image.colorspace()
  7310. xres, yres = mupdf.fz_image_resolution(image)
  7311. mask = mupdf.fz_new_image_from_buffer(maskbuf)
  7312. image = mupdf.fz_new_image_from_compressed_buffer2(
  7313. w,
  7314. h,
  7315. bpc,
  7316. colorspace,
  7317. xres,
  7318. yres,
  7319. 1, # interpolate
  7320. 0, # imagemask,
  7321. list(), # decode
  7322. list(), # colorkey
  7323. cbuf1,
  7324. mask,
  7325. )
  7326. if do_have_image:
  7327. #log( 'do_have_image')
  7328. ref = mupdf.pdf_add_image(pdf, image)
  7329. if oc:
  7330. JM_add_oc_object(pdf, ref, oc)
  7331. img_xref = mupdf.pdf_to_num(ref)
  7332. digests[md5_py] = img_xref
  7333. rc_digest = 1
  7334. if do_have_xref:
  7335. #log( 'do_have_xref')
  7336. resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
  7337. if not resources.m_internal:
  7338. resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
  7339. xobject = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
  7340. if not xobject.m_internal:
  7341. xobject = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 2)
  7342. mat = calc_image_matrix(w, h, clip, rotate, keep_proportion)
  7343. mupdf.pdf_dict_puts(xobject, _imgname, ref)
  7344. nres = mupdf.fz_new_buffer(50)
  7345. s = f"\nq\n{_format_g((mat.a, mat.b, mat.c, mat.d, mat.e, mat.f))} cm\n/{_imgname} Do\nQ\n"
  7346. #s = s.replace('\n', '\r\n')
  7347. mupdf.fz_append_string(nres, s)
  7348. JM_insert_contents(pdf, page.obj(), nres, overlay)
  7349. if rc_digest:
  7350. return img_xref, digests
  7351. else:
  7352. return img_xref, None
  7353. def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
  7354. page = self._pdf_page()
  7355. pdf = page.doc()
  7356. value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering)
  7357. # get the objects /Resources, /Resources/Font
  7358. resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
  7359. if not resources.pdf_is_dict():
  7360. resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME("Resources"), 5)
  7361. fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
  7362. if not fonts.m_internal: # page has no fonts yet
  7363. fonts = mupdf.pdf_new_dict(pdf, 5)
  7364. mupdf.pdf_dict_putl(page.obj(), fonts, PDF_NAME('Resources'), PDF_NAME('Font'))
  7365. # store font in resources and fonts objects will contain named reference to font
  7366. _, xref = JM_INT_ITEM(value, 0)
  7367. if not xref:
  7368. raise RuntimeError( "cannot insert font")
  7369. font_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  7370. mupdf.pdf_dict_puts(fonts, fontname, font_obj)
  7371. return value
  7372. def _load_annot(self, name, xref):
  7373. page = self._pdf_page()
  7374. if xref == 0:
  7375. annot = JM_get_annot_by_name(page, name)
  7376. else:
  7377. annot = JM_get_annot_by_xref(page, xref)
  7378. if annot.m_internal:
  7379. return Annot(annot)
  7380. def _makePixmap(self, doc, ctm, cs, alpha=0, annots=1, clip=None):
  7381. pix = JM_pixmap_from_page(doc, self.this, ctm, cs, alpha, annots, clip)
  7382. return Pixmap(pix)
  7383. def _other_box(self, boxtype):
  7384. rect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
  7385. page = _as_pdf_page(self.this, required=False)
  7386. if page.m_internal:
  7387. obj = mupdf.pdf_dict_gets( page.obj(), boxtype)
  7388. if mupdf.pdf_is_array(obj):
  7389. rect = mupdf.pdf_to_rect(obj)
  7390. if mupdf.fz_is_infinite_rect( rect):
  7391. return
  7392. return JM_py_from_rect(rect)
  7393. def _pdf_page(self, required=True):
  7394. return _as_pdf_page(self.this, required=required)
  7395. def _reset_annot_refs(self):
  7396. """Invalidate / delete all annots of this page."""
  7397. self._annot_refs.clear()
  7398. def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None):
  7399. if CA >= 1 and ca >= 1 and blendmode is None:
  7400. return
  7401. tCA = int(round(max(CA , 0) * 100))
  7402. if tCA >= 100:
  7403. tCA = 99
  7404. tca = int(round(max(ca, 0) * 100))
  7405. if tca >= 100:
  7406. tca = 99
  7407. gstate = "fitzca%02i%02i" % (tCA, tca)
  7408. if not gstate:
  7409. return
  7410. page = _as_pdf_page(self.this)
  7411. resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
  7412. if not resources.m_internal:
  7413. resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
  7414. extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
  7415. if not extg.m_internal:
  7416. extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), 2)
  7417. n = mupdf.pdf_dict_len(extg)
  7418. for i in range(n):
  7419. o1 = mupdf.pdf_dict_get_key(extg, i)
  7420. name = mupdf.pdf_to_name(o1)
  7421. if name == gstate:
  7422. return gstate
  7423. opa = mupdf.pdf_new_dict(page.doc(), 3)
  7424. mupdf.pdf_dict_put_real(opa, PDF_NAME('CA'), CA)
  7425. mupdf.pdf_dict_put_real(opa, PDF_NAME('ca'), ca)
  7426. mupdf.pdf_dict_puts(extg, gstate, opa)
  7427. return gstate
  7428. def _set_pagebox(self, boxtype, rect):
  7429. doc = self.parent
  7430. if doc is None:
  7431. raise ValueError("orphaned object: parent is None")
  7432. if not doc.is_pdf:
  7433. raise ValueError("is no PDF")
  7434. valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox")
  7435. if boxtype not in valid_boxes:
  7436. raise ValueError("bad boxtype")
  7437. rect = Rect(rect)
  7438. mb = self.mediabox
  7439. rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
  7440. if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1):
  7441. raise ValueError(f"{boxtype} not in MediaBox")
  7442. doc.xref_set_key(self.xref, boxtype, f"[{_format_g(tuple(rect))}]")
  7443. def _set_resource_property(self, name, xref):
  7444. page = self._pdf_page()
  7445. JM_set_resource_property(page.obj(), name, xref)
  7446. def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None):
  7447. cropbox = JM_rect_from_py(clip)
  7448. mat = JM_matrix_from_py(matrix)
  7449. rc_xref = xref
  7450. tpage = _as_pdf_page(self.this)
  7451. tpageref = tpage.obj()
  7452. pdfout = tpage.doc() # target PDF
  7453. ENSURE_OPERATION(pdfout)
  7454. #-------------------------------------------------------------
  7455. # convert the source page to a Form XObject
  7456. #-------------------------------------------------------------
  7457. xobj1 = JM_xobject_from_page(pdfout, fz_srcpage, xref, graftmap.this)
  7458. if not rc_xref:
  7459. rc_xref = mupdf.pdf_to_num(xobj1)
  7460. #-------------------------------------------------------------
  7461. # create referencing XObject (controls display on target page)
  7462. #-------------------------------------------------------------
  7463. # fill reference to xobj1 into the /Resources
  7464. #-------------------------------------------------------------
  7465. subres1 = mupdf.pdf_new_dict(pdfout, 5)
  7466. mupdf.pdf_dict_puts(subres1, "fullpage", xobj1)
  7467. subres = mupdf.pdf_new_dict(pdfout, 5)
  7468. mupdf.pdf_dict_put(subres, PDF_NAME('XObject'), subres1)
  7469. res = mupdf.fz_new_buffer(20)
  7470. mupdf.fz_append_string(res, "/fullpage Do")
  7471. xobj2 = mupdf.pdf_new_xobject(pdfout, cropbox, mat, subres, res)
  7472. if oc > 0:
  7473. JM_add_oc_object(pdfout, mupdf.pdf_resolve_indirect(xobj2), oc)
  7474. #-------------------------------------------------------------
  7475. # update target page with xobj2:
  7476. #-------------------------------------------------------------
  7477. # 1. insert Xobject in Resources
  7478. #-------------------------------------------------------------
  7479. resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
  7480. if not resources.m_internal:
  7481. resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'), 5)
  7482. subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
  7483. if not subres.m_internal:
  7484. subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
  7485. mupdf.pdf_dict_puts(subres, _imgname, xobj2)
  7486. #-------------------------------------------------------------
  7487. # 2. make and insert new Contents object
  7488. #-------------------------------------------------------------
  7489. nres = mupdf.fz_new_buffer(50) # buffer for Do-command
  7490. mupdf.fz_append_string(nres, " q /") # Do-command
  7491. mupdf.fz_append_string(nres, _imgname)
  7492. mupdf.fz_append_string(nres, " Do Q ")
  7493. JM_insert_contents(pdfout, tpageref, nres, overlay)
  7494. return rc_xref
  7495. def add_caret_annot(self, point: point_like) -> Annot:
  7496. """Add a 'Caret' annotation."""
  7497. old_rotation = annot_preprocess(self)
  7498. try:
  7499. annot = self._add_caret_annot(point)
  7500. finally:
  7501. if old_rotation != 0:
  7502. self.set_rotation(old_rotation)
  7503. annot = Annot( annot)
  7504. annot_postprocess(self, annot)
  7505. assert hasattr( annot, 'parent')
  7506. return annot
  7507. def add_circle_annot(self, rect: rect_like) -> Annot:
  7508. """Add a 'Circle' (ellipse, oval) annotation."""
  7509. old_rotation = annot_preprocess(self)
  7510. try:
  7511. annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_CIRCLE)
  7512. finally:
  7513. if old_rotation != 0:
  7514. self.set_rotation(old_rotation)
  7515. annot_postprocess(self, annot)
  7516. return annot
  7517. def add_file_annot(
  7518. self,
  7519. point: point_like,
  7520. buffer_: ByteString,
  7521. filename: str,
  7522. ufilename: OptStr =None,
  7523. desc: OptStr =None,
  7524. icon: OptStr =None
  7525. ) -> Annot:
  7526. """Add a 'FileAttachment' annotation."""
  7527. old_rotation = annot_preprocess(self)
  7528. try:
  7529. annot = self._add_file_annot(point,
  7530. buffer_,
  7531. filename,
  7532. ufilename=ufilename,
  7533. desc=desc,
  7534. icon=icon,
  7535. )
  7536. finally:
  7537. if old_rotation != 0:
  7538. self.set_rotation(old_rotation)
  7539. annot_postprocess(self, annot)
  7540. return annot
  7541. def add_freetext_annot(
  7542. self,
  7543. rect: rect_like,
  7544. text: str,
  7545. *,
  7546. fontsize: float =11,
  7547. fontname: OptStr =None,
  7548. text_color: OptSeq =None,
  7549. fill_color: OptSeq =None,
  7550. border_color: OptSeq =None,
  7551. border_width: float =0,
  7552. dashes: OptSeq =None,
  7553. callout: OptSeq =None,
  7554. line_end: int=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
  7555. opacity: float =1,
  7556. align: int =0,
  7557. rotate: int =0,
  7558. richtext=False,
  7559. style=None,
  7560. ) -> Annot:
  7561. """Add a 'FreeText' annotation."""
  7562. old_rotation = annot_preprocess(self)
  7563. try:
  7564. annot = self._add_freetext_annot(
  7565. rect,
  7566. text,
  7567. fontsize=fontsize,
  7568. fontname=fontname,
  7569. text_color=text_color,
  7570. fill_color=fill_color,
  7571. border_color=border_color,
  7572. border_width=border_width,
  7573. dashes=dashes,
  7574. callout=callout,
  7575. line_end=line_end,
  7576. opacity=opacity,
  7577. align=align,
  7578. rotate=rotate,
  7579. richtext=richtext,
  7580. style=style,
  7581. )
  7582. finally:
  7583. if old_rotation != 0:
  7584. self.set_rotation(old_rotation)
  7585. annot_postprocess(self, annot)
  7586. return annot
  7587. def add_highlight_annot(self, quads=None, start=None,
  7588. stop=None, clip=None) -> Annot:
  7589. """Add a 'Highlight' annotation."""
  7590. if quads is None:
  7591. q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
  7592. else:
  7593. q = CheckMarkerArg(quads)
  7594. ret = self._add_text_marker(q, mupdf.PDF_ANNOT_HIGHLIGHT)
  7595. return ret
  7596. def add_ink_annot(self, handwriting: list) -> Annot:
  7597. """Add a 'Ink' ('handwriting') annotation.
  7598. The argument must be a list of lists of point_likes.
  7599. """
  7600. old_rotation = annot_preprocess(self)
  7601. try:
  7602. annot = self._add_ink_annot(handwriting)
  7603. finally:
  7604. if old_rotation != 0:
  7605. self.set_rotation(old_rotation)
  7606. annot_postprocess(self, annot)
  7607. return annot
  7608. def add_line_annot(self, p1: point_like, p2: point_like) -> Annot:
  7609. """Add a 'Line' annotation."""
  7610. old_rotation = annot_preprocess(self)
  7611. try:
  7612. annot = self._add_line_annot(p1, p2)
  7613. finally:
  7614. if old_rotation != 0:
  7615. self.set_rotation(old_rotation)
  7616. annot_postprocess(self, annot)
  7617. return annot
  7618. def add_polygon_annot(self, points: list) -> Annot:
  7619. """Add a 'Polygon' annotation."""
  7620. old_rotation = annot_preprocess(self)
  7621. try:
  7622. annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLYGON)
  7623. finally:
  7624. if old_rotation != 0:
  7625. self.set_rotation(old_rotation)
  7626. annot_postprocess(self, annot)
  7627. return annot
  7628. def add_polyline_annot(self, points: list) -> Annot:
  7629. """Add a 'PolyLine' annotation."""
  7630. old_rotation = annot_preprocess(self)
  7631. try:
  7632. annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLY_LINE)
  7633. finally:
  7634. if old_rotation != 0:
  7635. self.set_rotation(old_rotation)
  7636. annot_postprocess(self, annot)
  7637. return annot
  7638. def add_rect_annot(self, rect: rect_like) -> Annot:
  7639. """Add a 'Square' (rectangle) annotation."""
  7640. old_rotation = annot_preprocess(self)
  7641. try:
  7642. annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_SQUARE)
  7643. finally:
  7644. if old_rotation != 0:
  7645. self.set_rotation(old_rotation)
  7646. annot_postprocess(self, annot)
  7647. return annot
  7648. def add_redact_annot(
  7649. self,
  7650. quad,
  7651. text: OptStr =None,
  7652. fontname: OptStr =None,
  7653. fontsize: float =11,
  7654. align: int =0,
  7655. fill: OptSeq =None,
  7656. text_color: OptSeq =None,
  7657. cross_out: bool =True,
  7658. ) -> Annot:
  7659. """Add a 'Redact' annotation."""
  7660. da_str = None
  7661. if text and not set(string.whitespace).issuperset(text):
  7662. CheckColor(fill)
  7663. CheckColor(text_color)
  7664. if not fontname:
  7665. fontname = "Helv"
  7666. if not fontsize:
  7667. fontsize = 11
  7668. if not text_color:
  7669. text_color = (0, 0, 0)
  7670. if hasattr(text_color, "__float__"):
  7671. text_color = (text_color, text_color, text_color)
  7672. if len(text_color) > 3:
  7673. text_color = text_color[:3]
  7674. fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf"
  7675. da_str = fmt.format(*text_color, f=fontname, s=fontsize)
  7676. if fill is None:
  7677. fill = (1, 1, 1)
  7678. if fill:
  7679. if hasattr(fill, "__float__"):
  7680. fill = (fill, fill, fill)
  7681. if len(fill) > 3:
  7682. fill = fill[:3]
  7683. else:
  7684. text = None
  7685. old_rotation = annot_preprocess(self)
  7686. try:
  7687. annot = self._add_redact_annot(quad, text=text, da_str=da_str,
  7688. align=align, fill=fill)
  7689. finally:
  7690. if old_rotation != 0:
  7691. self.set_rotation(old_rotation)
  7692. annot_postprocess(self, annot)
  7693. #-------------------------------------------------------------
  7694. # change appearance to show a crossed-out rectangle
  7695. #-------------------------------------------------------------
  7696. if cross_out:
  7697. ap_tab = annot._getAP().splitlines()[:-1] # get the 4 commands only
  7698. _, LL, LR, UR, UL = ap_tab
  7699. ap_tab.append(LR)
  7700. ap_tab.append(LL)
  7701. ap_tab.append(UR)
  7702. ap_tab.append(LL)
  7703. ap_tab.append(UL)
  7704. ap_tab.append(b"S")
  7705. ap = b"\n".join(ap_tab)
  7706. annot._setAP(ap, 0)
  7707. return annot
  7708. def add_squiggly_annot(
  7709. self,
  7710. quads=None,
  7711. start=None,
  7712. stop=None,
  7713. clip=None,
  7714. ) -> Annot:
  7715. """Add a 'Squiggly' annotation."""
  7716. if quads is None:
  7717. q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
  7718. else:
  7719. q = CheckMarkerArg(quads)
  7720. return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY)
  7721. def add_stamp_annot(self, rect: rect_like, stamp=0) -> Annot:
  7722. """Add a ('rubber') 'Stamp' annotation."""
  7723. old_rotation = annot_preprocess(self)
  7724. try:
  7725. annot = self._add_stamp_annot(rect, stamp)
  7726. finally:
  7727. if old_rotation != 0:
  7728. self.set_rotation(old_rotation)
  7729. annot_postprocess(self, annot)
  7730. return annot
  7731. def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
  7732. """Add a 'StrikeOut' annotation."""
  7733. if quads is None:
  7734. q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
  7735. else:
  7736. q = CheckMarkerArg(quads)
  7737. return self._add_text_marker(q, mupdf.PDF_ANNOT_STRIKE_OUT)
  7738. def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> Annot:
  7739. """Add a 'Text' (sticky note) annotation."""
  7740. old_rotation = annot_preprocess(self)
  7741. try:
  7742. annot = self._add_text_annot(point, text, icon=icon)
  7743. finally:
  7744. if old_rotation != 0:
  7745. self.set_rotation(old_rotation)
  7746. annot_postprocess(self, annot)
  7747. return annot
  7748. def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
  7749. """Add a 'Underline' annotation."""
  7750. if quads is None:
  7751. q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
  7752. else:
  7753. q = CheckMarkerArg(quads)
  7754. return self._add_text_marker(q, mupdf.PDF_ANNOT_UNDERLINE)
  7755. def add_widget(self, widget: Widget) -> Annot:
  7756. """Add a 'Widget' (form field)."""
  7757. CheckParent(self)
  7758. doc = self.parent
  7759. if not doc.is_pdf:
  7760. raise ValueError("is no PDF")
  7761. widget._validate()
  7762. annot = self._addWidget(widget.field_type, widget.field_name)
  7763. if not annot:
  7764. return None
  7765. annot.thisown = True
  7766. annot.parent = weakref.proxy(self) # owning page object
  7767. self._annot_refs[id(annot)] = annot
  7768. widget.parent = annot.parent
  7769. widget._annot = annot
  7770. widget.update()
  7771. return annot
  7772. def annot_names(self):
  7773. '''
  7774. page get list of annot names
  7775. '''
  7776. """List of names of annotations, fields and links."""
  7777. CheckParent(self)
  7778. page = self._pdf_page(required=False)
  7779. if not page.m_internal:
  7780. return []
  7781. return JM_get_annot_id_list(page)
  7782. def annot_xrefs(self):
  7783. '''
  7784. List of xref numbers of annotations, fields and links.
  7785. '''
  7786. return JM_get_annot_xref_list2(self)
  7787. def annots(self, types=None):
  7788. """ Generator over the annotations of a page.
  7789. Args:
  7790. types: (list) annotation types to subselect from. If none,
  7791. all annotations are returned. E.g. types=[PDF_ANNOT_LINE]
  7792. will only yield line annotations.
  7793. """
  7794. skip_types = (mupdf.PDF_ANNOT_LINK, mupdf.PDF_ANNOT_POPUP, mupdf.PDF_ANNOT_WIDGET)
  7795. if not hasattr(types, "__getitem__"):
  7796. annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types]
  7797. else:
  7798. annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types]
  7799. for xref in annot_xrefs:
  7800. annot = self.load_annot(xref)
  7801. annot._yielded=True
  7802. yield annot
  7803. def recolor(self, components=1):
  7804. """Convert colorspaces of objects on the page.
  7805. Valid values are 1, 3 and 4.
  7806. """
  7807. if components not in (1, 3, 4):
  7808. raise ValueError("components must be one of 1, 3, 4")
  7809. pdfdoc = _as_pdf_document(self.parent)
  7810. ropt = mupdf.pdf_recolor_options()
  7811. ropt.num_comp = components
  7812. ropts = mupdf.PdfRecolorOptions(ropt)
  7813. mupdf.pdf_recolor_page(pdfdoc, self.number, ropts)
  7814. def clip_to_rect(self, rect):
  7815. """Clip away page content outside the rectangle."""
  7816. clip = Rect(rect)
  7817. if clip.is_infinite or (clip & self.rect).is_empty:
  7818. raise ValueError("rect must not be infinite or empty")
  7819. clip *= self.transformation_matrix
  7820. pdfpage = _as_pdf_page(self)
  7821. pclip = JM_rect_from_py(clip)
  7822. mupdf.pdf_clip_page(pdfpage, pclip)
  7823. @property
  7824. def artbox(self):
  7825. """The ArtBox"""
  7826. rect = self._other_box("ArtBox")
  7827. if rect is None:
  7828. return self.cropbox
  7829. mb = self.mediabox
  7830. return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
  7831. @property
  7832. def bleedbox(self):
  7833. """The BleedBox"""
  7834. rect = self._other_box("BleedBox")
  7835. if rect is None:
  7836. return self.cropbox
  7837. mb = self.mediabox
  7838. return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
  7839. def bound(self):
  7840. """Get page rectangle."""
  7841. CheckParent(self)
  7842. page = _as_fz_page(self.this)
  7843. val = mupdf.fz_bound_page(page)
  7844. val = Rect(val)
  7845. if val.is_infinite and self.parent.is_pdf:
  7846. cb = self.cropbox
  7847. w, h = cb.width, cb.height
  7848. if self.rotation not in (0, 180):
  7849. w, h = h, w
  7850. val = Rect(0, 0, w, h)
  7851. msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1]
  7852. message(msg)
  7853. return val
  7854. def clean_contents(self, sanitize=1):
  7855. if not sanitize and not self.is_wrapped:
  7856. self.wrap_contents()
  7857. page = _as_pdf_page( self.this, required=False)
  7858. if not page.m_internal:
  7859. return
  7860. filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize)
  7861. mupdf.pdf_filter_page_contents( page.doc(), page, filter_)
  7862. @property
  7863. def cropbox(self):
  7864. """The CropBox."""
  7865. CheckParent(self)
  7866. page = self._pdf_page(required=False)
  7867. if not page.m_internal:
  7868. val = mupdf.fz_bound_page(self.this)
  7869. else:
  7870. val = JM_cropbox(page.obj())
  7871. val = Rect(val)
  7872. return val
  7873. @property
  7874. def cropbox_position(self):
  7875. return self.cropbox.tl
  7876. def delete_annot(self, annot):
  7877. """Delete annot and return next one."""
  7878. CheckParent(self)
  7879. CheckParent(annot)
  7880. page = self._pdf_page()
  7881. while 1:
  7882. # first loop through all /IRT annots and remove them
  7883. irt_annot = JM_find_annot_irt(annot.this)
  7884. if not irt_annot: # no more there
  7885. break
  7886. mupdf.pdf_delete_annot(page, irt_annot.this)
  7887. nextannot = mupdf.pdf_next_annot(annot.this) # store next
  7888. mupdf.pdf_delete_annot(page, annot.this)
  7889. val = Annot(nextannot)
  7890. if val:
  7891. val.thisown = True
  7892. val.parent = weakref.proxy(self) # owning page object
  7893. val.parent._annot_refs[id(val)] = val
  7894. annot._erase()
  7895. return val
  7896. def delete_link(self, linkdict):
  7897. """Delete a Link."""
  7898. CheckParent(self)
  7899. if not isinstance( linkdict, dict):
  7900. return # have no dictionary
  7901. def finished():
  7902. if linkdict["xref"] == 0: return
  7903. try:
  7904. linkid = linkdict["id"]
  7905. linkobj = self._annot_refs[linkid]
  7906. linkobj._erase()
  7907. except Exception:
  7908. # Don't print this exception, to match classic. Issue #2841.
  7909. if g_exceptions_verbose > 1: exception_info()
  7910. pass
  7911. page = _as_pdf_page(self.this, required=False)
  7912. if not page.m_internal:
  7913. return finished() # have no PDF
  7914. xref = linkdict[dictkey_xref]
  7915. if xref < 1:
  7916. return finished() # invalid xref
  7917. annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
  7918. if not annots.m_internal:
  7919. return finished() # have no annotations
  7920. len_ = mupdf.pdf_array_len( annots)
  7921. if len_ == 0:
  7922. return finished()
  7923. oxref = 0
  7924. for i in range( len_):
  7925. oxref = mupdf.pdf_to_num( mupdf.pdf_array_get( annots, i))
  7926. if xref == oxref:
  7927. break # found xref in annotations
  7928. if xref != oxref:
  7929. return finished() # xref not in annotations
  7930. mupdf.pdf_array_delete( annots, i) # delete entry in annotations
  7931. mupdf.pdf_delete_object( page.doc(), xref) # delete link object
  7932. mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots)
  7933. JM_refresh_links( page)
  7934. return finished()
  7935. @property
  7936. def derotation_matrix(self) -> Matrix:
  7937. """Reflects page de-rotation."""
  7938. if g_use_extra:
  7939. return Matrix(extra.Page_derotate_matrix( self.this))
  7940. pdfpage = self._pdf_page(required=False)
  7941. if not pdfpage.m_internal:
  7942. return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
  7943. return Matrix(JM_derotate_page_matrix(pdfpage))
  7944. def extend_textpage(self, tpage, flags=0, matrix=None):
  7945. page = self.this
  7946. tp = tpage.this
  7947. assert isinstance( tp, mupdf.FzStextPage)
  7948. options = mupdf.FzStextOptions()
  7949. options.flags = flags
  7950. ctm = JM_matrix_from_py(matrix)
  7951. dev = mupdf.FzDevice(tp, options)
  7952. mupdf.fz_run_page( page, dev, ctm, mupdf.FzCookie())
  7953. mupdf.fz_close_device( dev)
  7954. @property
  7955. def first_annot(self):
  7956. """First annotation."""
  7957. CheckParent(self)
  7958. page = self._pdf_page(required=False)
  7959. if not page.m_internal:
  7960. return
  7961. annot = mupdf.pdf_first_annot(page)
  7962. if not annot.m_internal:
  7963. return
  7964. val = Annot(annot)
  7965. val.thisown = True
  7966. val.parent = weakref.proxy(self) # owning page object
  7967. self._annot_refs[id(val)] = val
  7968. return val
  7969. @property
  7970. def first_link(self):
  7971. '''
  7972. First link on page
  7973. '''
  7974. return self.load_links()
  7975. @property
  7976. def first_widget(self):
  7977. """First widget/field."""
  7978. CheckParent(self)
  7979. annot = 0
  7980. page = self._pdf_page(required=False)
  7981. if not page.m_internal:
  7982. return
  7983. annot = mupdf.pdf_first_widget(page)
  7984. if not annot.m_internal:
  7985. return
  7986. val = Annot(annot)
  7987. val.thisown = True
  7988. val.parent = weakref.proxy(self) # owning page object
  7989. self._annot_refs[id(val)] = val
  7990. widget = Widget()
  7991. TOOLS._fill_widget(val, widget)
  7992. val = widget
  7993. return val
  7994. def get_bboxlog(self, layers=None):
  7995. CheckParent(self)
  7996. old_rotation = self.rotation
  7997. if old_rotation != 0:
  7998. self.set_rotation(0)
  7999. page = self.this
  8000. rc = []
  8001. inc_layers = True if layers else False
  8002. dev = JM_new_bbox_device( rc, inc_layers)
  8003. mupdf.fz_run_page( page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  8004. mupdf.fz_close_device( dev)
  8005. if old_rotation != 0:
  8006. self.set_rotation(old_rotation)
  8007. return rc
  8008. def get_cdrawings(self, extended=None, callback=None, method=None):
  8009. """Extract vector graphics ("line art") from the page."""
  8010. CheckParent(self)
  8011. old_rotation = self.rotation
  8012. if old_rotation != 0:
  8013. self.set_rotation(0)
  8014. page = self.this
  8015. if isinstance(page, mupdf.PdfPage):
  8016. # Downcast pdf_page to fz_page.
  8017. page = mupdf.FzPage(page)
  8018. assert isinstance(page, mupdf.FzPage), f'{self.this=}'
  8019. clips = True if extended else False
  8020. prect = mupdf.fz_bound_page(page)
  8021. if g_use_extra:
  8022. rc = extra.get_cdrawings(page, extended, callback, method)
  8023. else:
  8024. rc = list()
  8025. if callable(callback) or method is not None:
  8026. dev = JM_new_lineart_device_Device(callback, clips, method)
  8027. else:
  8028. dev = JM_new_lineart_device_Device(rc, clips, method)
  8029. dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
  8030. mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  8031. mupdf.fz_close_device(dev)
  8032. if old_rotation != 0:
  8033. self.set_rotation(old_rotation)
  8034. if callable(callback) or method is not None:
  8035. return
  8036. return rc
  8037. def get_contents(self):
  8038. """Get xrefs of /Contents objects."""
  8039. CheckParent(self)
  8040. ret = []
  8041. page = _as_pdf_page(self.this)
  8042. obj = page.obj()
  8043. contents = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Contents)
  8044. if mupdf.pdf_is_array(contents):
  8045. n = mupdf.pdf_array_len(contents)
  8046. for i in range(n):
  8047. icont = mupdf.pdf_array_get(contents, i)
  8048. xref = mupdf.pdf_to_num(icont)
  8049. ret.append(xref)
  8050. elif contents.m_internal:
  8051. xref = mupdf.pdf_to_num(contents)
  8052. ret.append( xref)
  8053. return ret
  8054. def get_displaylist(self, annots=1):
  8055. '''
  8056. Make a DisplayList from the page for Pixmap generation.
  8057. Include (default) or exclude annotations.
  8058. '''
  8059. CheckParent(self)
  8060. if annots:
  8061. dl = mupdf.fz_new_display_list_from_page(self.this)
  8062. else:
  8063. dl = mupdf.fz_new_display_list_from_page_contents(self.this)
  8064. return DisplayList(dl)
  8065. def get_drawings(self, extended: bool=False) -> list:
  8066. """Retrieve vector graphics. The extended version includes clips.
  8067. Note:
  8068. For greater comfort, this method converts point-likes, rect-likes, quad-likes
  8069. of the C version to respective Point / Rect / Quad objects.
  8070. It also adds default items that are missing in original path types.
  8071. """
  8072. allkeys = (
  8073. 'closePath',
  8074. 'fill',
  8075. 'color',
  8076. 'width',
  8077. 'lineCap',
  8078. 'lineJoin',
  8079. 'dashes',
  8080. 'stroke_opacity',
  8081. 'fill_opacity',
  8082. 'even_odd',
  8083. )
  8084. val = self.get_cdrawings(extended=extended)
  8085. for i in range(len(val)):
  8086. npath = val[i]
  8087. if not npath["type"].startswith("clip"):
  8088. npath["rect"] = Rect(npath["rect"])
  8089. else:
  8090. npath["scissor"] = Rect(npath["scissor"])
  8091. if npath["type"]!="group":
  8092. items = npath["items"]
  8093. newitems = []
  8094. for item in items:
  8095. cmd = item[0]
  8096. rest = item[1:]
  8097. if cmd == "re":
  8098. item = ("re", Rect(rest[0]).normalize(), rest[1])
  8099. elif cmd == "qu":
  8100. item = ("qu", Quad(rest[0]))
  8101. else:
  8102. item = tuple([cmd] + [Point(i) for i in rest])
  8103. newitems.append(item)
  8104. npath["items"] = newitems
  8105. if npath['type'] in ('f', 's'):
  8106. for k in allkeys:
  8107. npath[k] = npath.get(k)
  8108. val[i] = npath
  8109. return val
  8110. class Drawpath(object):
  8111. """Reflects a path dictionary from get_cdrawings()."""
  8112. def __init__(self, **args):
  8113. self.__dict__.update(args)
  8114. class Drawpathlist(object):
  8115. """List of Path objects representing get_cdrawings() output."""
  8116. def __getitem__(self, item):
  8117. return self.paths.__getitem__(item)
  8118. def __init__(self):
  8119. self.paths = []
  8120. self.path_count = 0
  8121. self.group_count = 0
  8122. self.clip_count = 0
  8123. self.fill_count = 0
  8124. self.stroke_count = 0
  8125. self.fillstroke_count = 0
  8126. def __len__(self):
  8127. return self.paths.__len__()
  8128. def append(self, path):
  8129. self.paths.append(path)
  8130. self.path_count += 1
  8131. if path.type == "clip":
  8132. self.clip_count += 1
  8133. elif path.type == "group":
  8134. self.group_count += 1
  8135. elif path.type == "f":
  8136. self.fill_count += 1
  8137. elif path.type == "s":
  8138. self.stroke_count += 1
  8139. elif path.type == "fs":
  8140. self.fillstroke_count += 1
  8141. def clip_parents(self, i):
  8142. """Return list of parent clip paths.
  8143. Args:
  8144. i: (int) return parents of this path.
  8145. Returns:
  8146. List of the clip parents."""
  8147. if i >= self.path_count:
  8148. raise IndexError("bad path index")
  8149. while i < 0:
  8150. i += self.path_count
  8151. lvl = self.paths[i].level
  8152. clips = list( # clip paths before identified one
  8153. reversed(
  8154. [
  8155. p
  8156. for p in self.paths[:i]
  8157. if p.type == "clip" and p.level < lvl
  8158. ]
  8159. )
  8160. )
  8161. if clips == []: # none found: empty list
  8162. return []
  8163. nclips = [clips[0]] # init return list
  8164. for p in clips[1:]:
  8165. if p.level >= nclips[-1].level:
  8166. continue # only accept smaller clip levels
  8167. nclips.append(p)
  8168. return nclips
  8169. def group_parents(self, i):
  8170. """Return list of parent group paths.
  8171. Args:
  8172. i: (int) return parents of this path.
  8173. Returns:
  8174. List of the group parents."""
  8175. if i >= self.path_count:
  8176. raise IndexError("bad path index")
  8177. while i < 0:
  8178. i += self.path_count
  8179. lvl = self.paths[i].level
  8180. groups = list( # group paths before identified one
  8181. reversed(
  8182. [
  8183. p
  8184. for p in self.paths[:i]
  8185. if p.type == "group" and p.level < lvl
  8186. ]
  8187. )
  8188. )
  8189. if groups == []: # none found: empty list
  8190. return []
  8191. ngroups = [groups[0]] # init return list
  8192. for p in groups[1:]:
  8193. if p.level >= ngroups[-1].level:
  8194. continue # only accept smaller group levels
  8195. ngroups.append(p)
  8196. return ngroups
  8197. def get_lineart(self) -> object:
  8198. """Get page drawings paths.
  8199. Note:
  8200. For greater comfort, this method converts point-like, rect-like, quad-like
  8201. tuples of the C version to respective Point / Rect / Quad objects.
  8202. Also adds default items that are missing in original path types.
  8203. In contrast to get_drawings(), this output is an object.
  8204. """
  8205. val = self.get_cdrawings(extended=True)
  8206. paths = self.Drawpathlist()
  8207. for path in val:
  8208. npath = self.Drawpath(**path)
  8209. if npath.type != "clip":
  8210. npath.rect = Rect(path["rect"])
  8211. else:
  8212. npath.scissor = Rect(path["scissor"])
  8213. if npath.type != "group":
  8214. items = path["items"]
  8215. newitems = []
  8216. for item in items:
  8217. cmd = item[0]
  8218. rest = item[1:]
  8219. if cmd == "re":
  8220. item = ("re", Rect(rest[0]).normalize(), rest[1])
  8221. elif cmd == "qu":
  8222. item = ("qu", Quad(rest[0]))
  8223. else:
  8224. item = tuple([cmd] + [Point(i) for i in rest])
  8225. newitems.append(item)
  8226. npath.items = newitems
  8227. if npath.type == "f":
  8228. npath.stroke_opacity = None
  8229. npath.dashes = None
  8230. npath.line_join = None
  8231. npath.line_cap = None
  8232. npath.color = None
  8233. npath.width = None
  8234. paths.append(npath)
  8235. val = None
  8236. return paths
  8237. def remove_rotation(self):
  8238. """Set page rotation to 0 while maintaining visual appearance."""
  8239. rot = self.rotation # normalized rotation value
  8240. if rot == 0:
  8241. return Identity # nothing to do
  8242. # need to derotate the page's content
  8243. mb = self.mediabox # current mediabox
  8244. if rot == 90:
  8245. # before derotation, shift content horizontally
  8246. mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
  8247. elif rot == 270:
  8248. # before derotation, shift content vertically
  8249. mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
  8250. else: # rot = 180
  8251. mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
  8252. # prefix with derotation matrix
  8253. mat = mat0 * self.derotation_matrix
  8254. cmd = _format_g(tuple(mat)) + ' cm '
  8255. cmd = cmd.encode('utf8')
  8256. _ = TOOLS._insert_contents(self, cmd, False) # prepend to page contents
  8257. # swap x- and y-coordinates
  8258. if rot in (90, 270):
  8259. x0, y0, x1, y1 = mb
  8260. mb.x0 = y0
  8261. mb.y0 = x0
  8262. mb.x1 = y1
  8263. mb.y1 = x1
  8264. self.set_mediabox(mb)
  8265. self.set_rotation(0)
  8266. rot = ~mat # inverse of the derotation matrix
  8267. for annot in self.annots(): # modify rectangles of annotations
  8268. r = annot.rect * rot
  8269. # TODO: only try to set rectangle for applicable annot types
  8270. annot.set_rect(r)
  8271. for link in self.get_links(): # modify 'from' rectangles of links
  8272. r = link["from"] * rot
  8273. self.delete_link(link)
  8274. link["from"] = r
  8275. try: # invalid links remain deleted
  8276. self.insert_link(link)
  8277. except Exception:
  8278. pass
  8279. for widget in self.widgets(): # modify field rectangles
  8280. r = widget.rect * rot
  8281. widget.rect = r
  8282. widget.update()
  8283. return rot # the inverse of the generated derotation matrix
  8284. def cluster_drawings(
  8285. self, clip=None, drawings=None, x_tolerance: float = 3, y_tolerance: float = 3,
  8286. final_filter: bool = True,
  8287. ) -> list:
  8288. """Join rectangles of neighboring vector graphic items.
  8289. Args:
  8290. clip: optional rect-like to restrict the page area to consider.
  8291. drawings: (optional) output of a previous "get_drawings()".
  8292. x_tolerance: horizontal neighborhood threshold.
  8293. y_tolerance: vertical neighborhood threshold.
  8294. Notes:
  8295. Vector graphics (also called line-art or drawings) usually consist
  8296. of independent items like rectangles, lines or curves to jointly
  8297. form table grid lines or bar, line, pie charts and similar.
  8298. This method identifies rectangles wrapping these disparate items.
  8299. Returns:
  8300. A list of Rect items, each wrapping line-art items that are close
  8301. enough to be considered forming a common vector graphic.
  8302. Only "significant" rectangles will be returned, i.e. having both,
  8303. width and height larger than the tolerance values.
  8304. """
  8305. CheckParent(self)
  8306. parea = self.rect # the default clipping area
  8307. if clip is not None:
  8308. parea = Rect(clip)
  8309. delta_x = x_tolerance # shorter local name
  8310. delta_y = y_tolerance # shorter local name
  8311. if drawings is None: # if we cannot re-use a previous output
  8312. drawings = self.get_drawings()
  8313. def are_neighbors(r1, r2):
  8314. """Detect whether r1, r2 are "neighbors".
  8315. Items r1, r2 are called neighbors if the minimum distance between
  8316. their points is less-equal delta.
  8317. Both parameters must be (potentially invalid) rectangles.
  8318. """
  8319. # normalize rectangles as needed
  8320. rr1_x0, rr1_x1 = (r1.x0, r1.x1) if r1.x1 > r1.x0 else (r1.x1, r1.x0)
  8321. rr1_y0, rr1_y1 = (r1.y0, r1.y1) if r1.y1 > r1.y0 else (r1.y1, r1.y0)
  8322. rr2_x0, rr2_x1 = (r2.x0, r2.x1) if r2.x1 > r2.x0 else (r2.x1, r2.x0)
  8323. rr2_y0, rr2_y1 = (r2.y0, r2.y1) if r2.y1 > r2.y0 else (r2.y1, r2.y0)
  8324. if (
  8325. 0
  8326. or rr1_x1 < rr2_x0 - delta_x
  8327. or rr1_x0 > rr2_x1 + delta_x
  8328. or rr1_y1 < rr2_y0 - delta_y
  8329. or rr1_y0 > rr2_y1 + delta_y
  8330. ):
  8331. # Rects do not overlap.
  8332. return False
  8333. else:
  8334. # Rects overlap.
  8335. return True
  8336. # exclude graphics not contained in the clip
  8337. paths = [
  8338. p
  8339. for p in drawings
  8340. if 1
  8341. and p["rect"].x0 >= parea.x0
  8342. and p["rect"].x1 <= parea.x1
  8343. and p["rect"].y0 >= parea.y0
  8344. and p["rect"].y1 <= parea.y1
  8345. ]
  8346. # list of all vector graphic rectangles
  8347. prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
  8348. new_rects = [] # the final list of the joined rectangles
  8349. # -------------------------------------------------------------------------
  8350. # The strategy is to identify and join all rects that are neighbors
  8351. # -------------------------------------------------------------------------
  8352. while prects: # the algorithm will empty this list
  8353. r = +prects[0] # copy of first rectangle
  8354. repeat = True
  8355. while repeat:
  8356. repeat = False
  8357. for i in range(len(prects) - 1, 0, -1): # from back to front
  8358. if are_neighbors(prects[i], r):
  8359. r |= prects[i].tl # include in first rect
  8360. r |= prects[i].br # include in first rect
  8361. del prects[i] # delete this rect
  8362. repeat = True
  8363. new_rects.append(r)
  8364. del prects[0]
  8365. prects = sorted(set(prects), key=lambda r: (r.y1, r.x0))
  8366. new_rects = sorted(set(new_rects), key=lambda r: (r.y1, r.x0))
  8367. if not final_filter:
  8368. return new_rects
  8369. return [r for r in new_rects if r.width > delta_x and r.height > delta_y]
  8370. def get_fonts(self, full=False):
  8371. """List of fonts defined in the page object."""
  8372. CheckParent(self)
  8373. return self.parent.get_page_fonts(self.number, full=full)
  8374. def get_image_bbox(self, name, transform=0):
  8375. """Get rectangle occupied by image 'name'.
  8376. 'name' is either an item of the image list, or the referencing
  8377. name string - elem[7] of the resp. item.
  8378. Option 'transform' also returns the image transformation matrix.
  8379. """
  8380. CheckParent(self)
  8381. doc = self.parent
  8382. if doc.is_closed or doc.is_encrypted:
  8383. raise ValueError('document closed or encrypted')
  8384. inf_rect = Rect(1, 1, -1, -1)
  8385. null_mat = Matrix()
  8386. if transform:
  8387. rc = (inf_rect, null_mat)
  8388. else:
  8389. rc = inf_rect
  8390. if type(name) in (list, tuple):
  8391. if not type(name[-1]) is int:
  8392. raise ValueError('need item of full page image list')
  8393. item = name
  8394. else:
  8395. imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]]
  8396. if len(imglist) == 1:
  8397. item = imglist[0]
  8398. elif imglist == []:
  8399. raise ValueError('bad image name')
  8400. else:
  8401. raise ValueError("found multiple images named '%s'." % name)
  8402. xref = item[-1]
  8403. if xref != 0 or transform:
  8404. try:
  8405. return self.get_image_rects(item, transform=transform)[0]
  8406. except Exception:
  8407. exception_info()
  8408. return inf_rect
  8409. pdf_page = self._pdf_page()
  8410. val = JM_image_reporter(pdf_page)
  8411. if not bool(val):
  8412. return rc
  8413. for v in val:
  8414. if v[0] != item[-3]:
  8415. continue
  8416. q = Quad(v[1])
  8417. bbox = q.rect
  8418. if transform == 0:
  8419. rc = bbox
  8420. break
  8421. hm = Matrix(util_hor_matrix(q.ll, q.lr))
  8422. h = abs(q.ll - q.ul)
  8423. w = abs(q.ur - q.ul)
  8424. m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0)
  8425. m = ~(hm * m0)
  8426. rc = (bbox, m)
  8427. break
  8428. val = rc
  8429. return val
  8430. def get_images(self, full=False):
  8431. """List of images defined in the page object."""
  8432. CheckParent(self)
  8433. return self.parent.get_page_images(self.number, full=full)
  8434. def get_oc_items(self) -> list:
  8435. """Get OCGs and OCMDs used in the page's contents.
  8436. Returns:
  8437. List of items (name, xref, type), where type is one of "ocg" / "ocmd",
  8438. and name is the property name.
  8439. """
  8440. rc = []
  8441. for pname, xref in self._get_resource_properties():
  8442. text = self.parent.xref_object(xref, compressed=True)
  8443. if "/Type/OCG" in text:
  8444. octype = "ocg"
  8445. elif "/Type/OCMD" in text:
  8446. octype = "ocmd"
  8447. else:
  8448. continue
  8449. rc.append((pname, xref, octype))
  8450. return rc
  8451. def get_svg_image(self, matrix=None, text_as_path=1):
  8452. """Make SVG image from page."""
  8453. CheckParent(self)
  8454. mediabox = mupdf.fz_bound_page(self.this)
  8455. ctm = JM_matrix_from_py(matrix)
  8456. tbounds = mediabox
  8457. text_option = mupdf.FZ_SVG_TEXT_AS_PATH if text_as_path == 1 else mupdf.FZ_SVG_TEXT_AS_TEXT
  8458. tbounds = mupdf.fz_transform_rect(tbounds, ctm)
  8459. res = mupdf.fz_new_buffer(1024)
  8460. out = mupdf.FzOutput(res)
  8461. dev = mupdf.fz_new_svg_device(
  8462. out,
  8463. tbounds.x1-tbounds.x0, # width
  8464. tbounds.y1-tbounds.y0, # height
  8465. text_option,
  8466. 1,
  8467. )
  8468. mupdf.fz_run_page(self.this, dev, ctm, mupdf.FzCookie())
  8469. mupdf.fz_close_device(dev)
  8470. out.fz_close_output()
  8471. text = JM_EscapeStrFromBuffer(res)
  8472. return text
  8473. def get_textbox(
  8474. page: Page,
  8475. rect: rect_like,
  8476. textpage=None, #: TextPage = None,
  8477. ) -> str:
  8478. tp = textpage
  8479. if tp is None:
  8480. tp = page.get_textpage()
  8481. elif getattr(tp, "parent") != page:
  8482. raise ValueError("not a textpage of this page")
  8483. rc = tp.extractTextbox(rect)
  8484. if textpage is None:
  8485. del tp
  8486. return rc
  8487. def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
  8488. CheckParent(self)
  8489. if matrix is None:
  8490. matrix = Matrix(1, 1)
  8491. old_rotation = self.rotation
  8492. if old_rotation != 0:
  8493. self.set_rotation(0)
  8494. try:
  8495. textpage = self._get_textpage(clip, flags=flags, matrix=matrix)
  8496. finally:
  8497. if old_rotation != 0:
  8498. self.set_rotation(old_rotation)
  8499. textpage = TextPage(textpage)
  8500. textpage.parent = weakref.proxy(self)
  8501. return textpage
  8502. def get_texttrace(self):
  8503. CheckParent(self)
  8504. old_rotation = self.rotation
  8505. if old_rotation != 0:
  8506. self.set_rotation(0)
  8507. page = self.this
  8508. rc = []
  8509. if g_use_extra:
  8510. dev = extra.JM_new_texttrace_device(rc)
  8511. else:
  8512. dev = JM_new_texttrace_device(rc)
  8513. prect = mupdf.fz_bound_page(page)
  8514. dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
  8515. mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  8516. mupdf.fz_close_device(dev)
  8517. if old_rotation != 0:
  8518. self.set_rotation(old_rotation)
  8519. return rc
  8520. def get_xobjects(self):
  8521. """List of xobjects defined in the page object."""
  8522. CheckParent(self)
  8523. return self.parent.get_page_xobjects(self.number)
  8524. def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None,
  8525. set_simple=False, wmode=0, encoding=0):
  8526. doc = self.parent
  8527. if doc is None:
  8528. raise ValueError("orphaned object: parent is None")
  8529. idx = 0
  8530. if fontname.startswith("/"):
  8531. fontname = fontname[1:]
  8532. inv_chars = INVALID_NAME_CHARS.intersection(fontname)
  8533. if inv_chars != set():
  8534. raise ValueError(f"bad fontname chars {inv_chars}")
  8535. font = CheckFont(self, fontname)
  8536. if font is not None: # font already in font list of page
  8537. xref = font[0] # this is the xref
  8538. if CheckFontInfo(doc, xref): # also in our document font list?
  8539. return xref # yes: we are done
  8540. # need to build the doc FontInfo entry - done via get_char_widths
  8541. doc.get_char_widths(xref)
  8542. return xref
  8543. #--------------------------------------------------------------------------
  8544. # the font is not present for this page
  8545. #--------------------------------------------------------------------------
  8546. bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font
  8547. serif = 0
  8548. CJK_number = -1
  8549. CJK_list_n = ["china-t", "china-s", "japan", "korea"]
  8550. CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"]
  8551. try:
  8552. CJK_number = CJK_list_n.index(fontname)
  8553. serif = 0
  8554. except Exception:
  8555. # Verbose in PyMuPDF/tests.
  8556. if g_exceptions_verbose > 1: exception_info()
  8557. pass
  8558. if CJK_number < 0:
  8559. try:
  8560. CJK_number = CJK_list_s.index(fontname)
  8561. serif = 1
  8562. except Exception:
  8563. # Verbose in PyMuPDF/tests.
  8564. if g_exceptions_verbose > 1: exception_info()
  8565. pass
  8566. if fontname.lower() in fitz_fontdescriptors.keys():
  8567. import pymupdf_fonts
  8568. fontbuffer = pymupdf_fonts.myfont(fontname) # make a copy
  8569. del pymupdf_fonts
  8570. # install the font for the page
  8571. if fontfile is not None:
  8572. if type(fontfile) is str:
  8573. fontfile_str = fontfile
  8574. elif hasattr(fontfile, "absolute"):
  8575. fontfile_str = str(fontfile)
  8576. elif hasattr(fontfile, "name"):
  8577. fontfile_str = fontfile.name
  8578. else:
  8579. raise ValueError("bad fontfile")
  8580. else:
  8581. fontfile_str = None
  8582. val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx,
  8583. wmode, serif, encoding, CJK_number)
  8584. if not val: # did not work, error return
  8585. return val
  8586. xref = val[0] # xref of installed font
  8587. fontdict = val[1]
  8588. if CheckFontInfo(doc, xref): # check again: document already has this font
  8589. return xref # we are done
  8590. # need to create document font info
  8591. doc.get_char_widths(xref, fontdict=fontdict)
  8592. return xref
  8593. @property
  8594. def is_wrapped(self):
  8595. """Check if /Contents is in a balanced graphics state."""
  8596. return self._count_q_balance() == (0, 0)
  8597. @property
  8598. def language(self):
  8599. """Page language."""
  8600. pdfpage = _as_pdf_page(self.this, required=False)
  8601. if not pdfpage.m_internal:
  8602. return
  8603. lang = mupdf.pdf_dict_get_inheritable(pdfpage.obj(), PDF_NAME('Lang'))
  8604. if not lang.m_internal:
  8605. return
  8606. return mupdf.pdf_to_str_buf(lang)
  8607. def links(self, kinds=None):
  8608. """ Generator over the links of a page.
  8609. Args:
  8610. kinds: (list) link kinds to subselect from. If none,
  8611. all links are returned. E.g. kinds=[LINK_URI]
  8612. will only yield URI links.
  8613. """
  8614. all_links = self.get_links()
  8615. for link in all_links:
  8616. if kinds is None or link["kind"] in kinds:
  8617. yield (link)
  8618. def load_annot(self, ident: typing.Union[str, int]) -> Annot:
  8619. """Load an annot by name (/NM key) or xref.
  8620. Args:
  8621. ident: identifier, either name (str) or xref (int).
  8622. """
  8623. CheckParent(self)
  8624. if type(ident) is str:
  8625. xref = 0
  8626. name = ident
  8627. elif type(ident) is int:
  8628. xref = ident
  8629. name = None
  8630. else:
  8631. raise ValueError("identifier must be a string or integer")
  8632. val = self._load_annot(name, xref)
  8633. if not val:
  8634. return val
  8635. val.thisown = True
  8636. val.parent = weakref.proxy(self)
  8637. self._annot_refs[id(val)] = val
  8638. return val
  8639. def load_links(self):
  8640. """Get first Link."""
  8641. CheckParent(self)
  8642. val = mupdf.fz_load_links( self.this)
  8643. if not val.m_internal:
  8644. return
  8645. val = Link( val)
  8646. val.thisown = True
  8647. val.parent = weakref.proxy(self) # owning page object
  8648. self._annot_refs[id(val)] = val
  8649. val.xref = 0
  8650. val.id = ""
  8651. if self.parent.is_pdf:
  8652. xrefs = self.annot_xrefs()
  8653. xrefs = [x for x in xrefs if x[1] == mupdf.PDF_ANNOT_LINK]
  8654. if xrefs:
  8655. link_id = xrefs[0]
  8656. val.xref = link_id[0]
  8657. val.id = link_id[2]
  8658. else:
  8659. val.xref = 0
  8660. val.id = ""
  8661. return val
  8662. #----------------------------------------------------------------
  8663. # page load widget by xref
  8664. #----------------------------------------------------------------
  8665. def load_widget( self, xref):
  8666. """Load a widget by its xref."""
  8667. CheckParent(self)
  8668. page = _as_pdf_page(self.this)
  8669. annot = JM_get_widget_by_xref( page, xref)
  8670. #log( '{=type(annot)}')
  8671. val = annot
  8672. if not val:
  8673. return val
  8674. val.thisown = True
  8675. val.parent = weakref.proxy(self)
  8676. self._annot_refs[id(val)] = val
  8677. widget = Widget()
  8678. TOOLS._fill_widget(val, widget)
  8679. val = widget
  8680. return val
  8681. @property
  8682. def mediabox(self):
  8683. """The MediaBox."""
  8684. CheckParent(self)
  8685. page = self._pdf_page(required=False)
  8686. if not page.m_internal:
  8687. rect = mupdf.fz_bound_page( self.this)
  8688. else:
  8689. rect = JM_mediabox( page.obj())
  8690. return Rect(rect)
  8691. @property
  8692. def mediabox_size(self):
  8693. return Point(self.mediabox.x1, self.mediabox.y1)
  8694. #@property
  8695. #def parent( self):
  8696. # assert self._parent
  8697. # if self._parent:
  8698. # return self._parent
  8699. # return Document( self.this.document())
  8700. def read_contents(self):
  8701. """All /Contents streams concatenated to one bytes object."""
  8702. return TOOLS._get_all_contents(self)
  8703. def refresh(self):
  8704. """Refresh page after link/annot/widget updates."""
  8705. CheckParent(self)
  8706. doc = self.parent
  8707. page = doc.reload_page(self)
  8708. # fixme this looks wrong.
  8709. self.this = page
  8710. @property
  8711. def rotation(self):
  8712. """Page rotation."""
  8713. CheckParent(self)
  8714. page = _as_pdf_page(self.this, required=0)
  8715. if not page.m_internal:
  8716. return 0
  8717. return JM_page_rotation(page)
  8718. @property
  8719. def rotation_matrix(self) -> Matrix:
  8720. """Reflects page rotation."""
  8721. return Matrix(TOOLS._rotate_matrix(self))
  8722. def run(self, dw, m):
  8723. """Run page through a device.
  8724. dw: DeviceWrapper
  8725. """
  8726. CheckParent(self)
  8727. mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
  8728. def set_artbox(self, rect):
  8729. """Set the ArtBox."""
  8730. return self._set_pagebox("ArtBox", rect)
  8731. def set_bleedbox(self, rect):
  8732. """Set the BleedBox."""
  8733. return self._set_pagebox("BleedBox", rect)
  8734. def set_contents(self, xref):
  8735. """Set object at 'xref' as the page's /Contents."""
  8736. CheckParent(self)
  8737. doc = self.parent
  8738. if doc.is_closed:
  8739. raise ValueError("document closed")
  8740. if not doc.is_pdf:
  8741. raise ValueError("is no PDF")
  8742. if xref not in range(1, doc.xref_length()):
  8743. raise ValueError("bad xref")
  8744. if not doc.xref_is_stream(xref):
  8745. raise ValueError("xref is no stream")
  8746. doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref)
  8747. def set_cropbox(self, rect):
  8748. """Set the CropBox. Will also change Page.rect."""
  8749. return self._set_pagebox("CropBox", rect)
  8750. def set_language(self, language=None):
  8751. """Set PDF page default language."""
  8752. CheckParent(self)
  8753. pdfpage = _as_pdf_page(self.this)
  8754. if not language:
  8755. mupdf.pdf_dict_del(pdfpage.obj(), PDF_NAME('Lang'))
  8756. else:
  8757. lang = mupdf.fz_text_language_from_string(language)
  8758. assert hasattr(mupdf, 'fz_string_from_text_language2')
  8759. mupdf.pdf_dict_put_text_string(
  8760. pdfpage.obj,
  8761. PDF_NAME('Lang'),
  8762. mupdf.fz_string_from_text_language2(lang)
  8763. )
  8764. def set_mediabox(self, rect):
  8765. """Set the MediaBox."""
  8766. CheckParent(self)
  8767. page = self._pdf_page()
  8768. mediabox = JM_rect_from_py(rect)
  8769. if (mupdf.fz_is_empty_rect(mediabox)
  8770. or mupdf.fz_is_infinite_rect(mediabox)
  8771. ):
  8772. raise ValueError( MSG_BAD_RECT)
  8773. mupdf.pdf_dict_put_rect( page.obj(), PDF_NAME('MediaBox'), mediabox)
  8774. mupdf.pdf_dict_del( page.obj(), PDF_NAME('CropBox'))
  8775. mupdf.pdf_dict_del( page.obj(), PDF_NAME('ArtBox'))
  8776. mupdf.pdf_dict_del( page.obj(), PDF_NAME('BleedBox'))
  8777. mupdf.pdf_dict_del( page.obj(), PDF_NAME('TrimBox'))
  8778. def set_rotation(self, rotation):
  8779. """Set page rotation."""
  8780. CheckParent(self)
  8781. page = _as_pdf_page(self.this)
  8782. rot = JM_norm_rotation(rotation)
  8783. mupdf.pdf_dict_put_int( page.obj(), PDF_NAME('Rotate'), rot)
  8784. def set_trimbox(self, rect):
  8785. """Set the TrimBox."""
  8786. return self._set_pagebox("TrimBox", rect)
  8787. @property
  8788. def transformation_matrix(self):
  8789. """Page transformation matrix."""
  8790. CheckParent(self)
  8791. ctm = mupdf.FzMatrix()
  8792. page = self._pdf_page(required=False)
  8793. if not page.m_internal:
  8794. return JM_py_from_matrix(ctm)
  8795. mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) # fixme: original code passed mediabox=NULL.
  8796. mupdf.pdf_page_transform(page, mediabox, ctm)
  8797. val = JM_py_from_matrix(ctm)
  8798. if self.rotation % 360 == 0:
  8799. val = Matrix(val)
  8800. else:
  8801. val = Matrix(1, 0, 0, -1, 0, self.cropbox.height)
  8802. return val
  8803. @property
  8804. def trimbox(self):
  8805. """The TrimBox"""
  8806. rect = self._other_box("TrimBox")
  8807. if rect is None:
  8808. return self.cropbox
  8809. mb = self.mediabox
  8810. return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
  8811. def widgets(self, types=None):
  8812. """ Generator over the widgets of a page.
  8813. Args:
  8814. types: (list) field types to subselect from. If none,
  8815. all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT]
  8816. will only yield text fields.
  8817. """
  8818. #for a in self.annot_xrefs():
  8819. # log( '{a=}')
  8820. widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == mupdf.PDF_ANNOT_WIDGET]
  8821. #log(f'widgets(): {widget_xrefs=}')
  8822. for xref in widget_xrefs:
  8823. widget = self.load_widget(xref)
  8824. if types is None or widget.field_type in types:
  8825. yield (widget)
  8826. def wrap_contents(self):
  8827. """Ensure page is in a balanced graphics state."""
  8828. push, pop = self._count_q_balance() # count missing "q"/"Q" commands
  8829. if push > 0: # prepend required push commands
  8830. prepend = b"q\n" * push
  8831. TOOLS._insert_contents(self, prepend, False)
  8832. if pop > 0: # append required pop commands
  8833. append = b"\nQ" * pop + b"\n"
  8834. TOOLS._insert_contents(self, append, True)
  8835. @property
  8836. def xref(self):
  8837. """PDF xref number of page."""
  8838. CheckParent(self)
  8839. return self.parent.page_xref(self.number)
  8840. rect = property(bound, doc="page rectangle")
  8841. class Pixmap:
  8842. def __init__(self, *args):
  8843. """
  8844. Pixmap(colorspace, irect, alpha) - empty pixmap.
  8845. Pixmap(colorspace, src) - copy changing colorspace.
  8846. Pixmap(src, width, height,[clip]) - scaled copy, float dimensions.
  8847. Pixmap(src, alpha=1) - copy and add or drop alpha channel.
  8848. Pixmap(filename) - from an image in a file.
  8849. Pixmap(image) - from an image in memory (bytes).
  8850. Pixmap(colorspace, width, height, samples, alpha) - from samples data.
  8851. Pixmap(PDFdoc, xref) - from an image at xref in a PDF document.
  8852. """
  8853. # Cache for property `self.samples_mv`. Set here so __del_() sees it if
  8854. # we raise.
  8855. #
  8856. self._samples_mv = None
  8857. # 2024-01-16: Experimental support for a memory-view of the underlying
  8858. # data. Doesn't seem to make much difference to Pixmap.set_pixel() so
  8859. # not currently used.
  8860. self._memory_view = None
  8861. if 0:
  8862. pass
  8863. elif args_match(args,
  8864. (Colorspace, mupdf.FzColorspace),
  8865. (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple)
  8866. ):
  8867. # create empty pixmap with colorspace and IRect
  8868. cs, rect = args
  8869. alpha = 0
  8870. pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
  8871. self.this = pm
  8872. elif args_match(args,
  8873. (Colorspace, mupdf.FzColorspace),
  8874. (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple),
  8875. (int, bool)
  8876. ):
  8877. # create empty pixmap with colorspace and IRect
  8878. cs, rect, alpha = args
  8879. pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
  8880. self.this = pm
  8881. elif args_match(args, (Colorspace, mupdf.FzColorspace, type(None)), (Pixmap, mupdf.FzPixmap)):
  8882. # copy pixmap, converting colorspace
  8883. cs, spix = args
  8884. if isinstance(cs, Colorspace):
  8885. cs = cs.this
  8886. elif cs is None:
  8887. cs = mupdf.FzColorspace(None)
  8888. if isinstance(spix, Pixmap):
  8889. spix = spix.this
  8890. if not mupdf.fz_pixmap_colorspace(spix).m_internal:
  8891. raise ValueError( "source colorspace must not be None")
  8892. if cs.m_internal:
  8893. self.this = mupdf.fz_convert_pixmap(
  8894. spix,
  8895. cs,
  8896. mupdf.FzColorspace(),
  8897. mupdf.FzDefaultColorspaces(None),
  8898. mupdf.FzColorParams(),
  8899. 1
  8900. )
  8901. else:
  8902. self.this = mupdf.fz_new_pixmap_from_alpha_channel( spix)
  8903. if not self.this.m_internal:
  8904. raise RuntimeError( MSG_PIX_NOALPHA)
  8905. elif args_match(args, (Pixmap, mupdf.FzPixmap), (Pixmap, mupdf.FzPixmap)):
  8906. # add mask to a pixmap w/o alpha channel
  8907. spix, mpix = args
  8908. if isinstance(spix, Pixmap):
  8909. spix = spix.this
  8910. if isinstance(mpix, Pixmap):
  8911. mpix = mpix.this
  8912. spm = spix
  8913. mpm = mpix
  8914. if not spix.m_internal: # intercept NULL for spix: make alpha only pix
  8915. dst = mupdf.fz_new_pixmap_from_alpha_channel(mpm)
  8916. if not dst.m_internal:
  8917. raise RuntimeError( MSG_PIX_NOALPHA)
  8918. else:
  8919. dst = mupdf.fz_new_pixmap_from_color_and_mask(spm, mpm)
  8920. self.this = dst
  8921. elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or
  8922. args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))):
  8923. # create pixmap as scaled copy of another one
  8924. if len(args) == 3:
  8925. spix, w, h = args
  8926. bbox = mupdf.FzIrect(mupdf.fz_infinite_irect)
  8927. else:
  8928. spix, w, h, clip = args
  8929. bbox = JM_irect_from_py(clip)
  8930. src_pix = spix.this if isinstance(spix, Pixmap) else spix
  8931. if not mupdf.fz_is_infinite_irect(bbox):
  8932. pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, bbox)
  8933. else:
  8934. pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, mupdf.FzIrect(mupdf.fz_infinite_irect))
  8935. self.this = pm
  8936. elif args_match(args, str, (Pixmap, mupdf.FzPixmap)) and args[0] == 'raw':
  8937. # Special raw construction where we set .this directly.
  8938. _, pm = args
  8939. if isinstance(pm, Pixmap):
  8940. pm = pm.this
  8941. self.this = pm
  8942. elif args_match(args, (Pixmap, mupdf.FzPixmap), (int, None)):
  8943. # Pixmap(struct Pixmap *spix, int alpha=1)
  8944. # copy pixmap & add / drop the alpha channel
  8945. spix = args[0]
  8946. alpha = args[1] if len(args) == 2 else 1
  8947. src_pix = spix.this if isinstance(spix, Pixmap) else spix
  8948. if not _INRANGE(alpha, 0, 1):
  8949. raise ValueError( "bad alpha value")
  8950. cs = mupdf.fz_pixmap_colorspace(src_pix)
  8951. if not cs.m_internal and not alpha:
  8952. raise ValueError( "cannot drop alpha for 'NULL' colorspace")
  8953. seps = mupdf.FzSeparations()
  8954. n = mupdf.fz_pixmap_colorants(src_pix)
  8955. w = mupdf.fz_pixmap_width(src_pix)
  8956. h = mupdf.fz_pixmap_height(src_pix)
  8957. pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
  8958. pm.m_internal.x = src_pix.m_internal.x
  8959. pm.m_internal.y = src_pix.m_internal.y
  8960. pm.m_internal.xres = src_pix.m_internal.xres
  8961. pm.m_internal.yres = src_pix.m_internal.yres
  8962. # copy samples data ------------------------------------------
  8963. if 1:
  8964. # We use our pixmap_copy() to get best performance.
  8965. # test_pixmap.py:test_setalpha(): 3.9s t=0.0062
  8966. extra.pixmap_copy( pm.m_internal, src_pix.m_internal, n)
  8967. elif 1:
  8968. # Use memoryview.
  8969. # test_pixmap.py:test_setalpha(): 4.6 t=0.51
  8970. src_view = mupdf.fz_pixmap_samples_memoryview( src_pix)
  8971. pm_view = mupdf.fz_pixmap_samples_memoryview( pm)
  8972. if src_pix.alpha() == pm.alpha(): # identical samples
  8973. #memcpy(tptr, sptr, w * h * (n + alpha));
  8974. size = w * h * (n + alpha)
  8975. pm_view[ 0 : size] = src_view[ 0 : size]
  8976. else:
  8977. tptr = 0
  8978. sptr = 0
  8979. # This is a little faster than calling
  8980. # pm.fz_samples_set(), but still quite slow. E.g. reduces
  8981. # test_pixmap.py:test_setalpha() from 6.7s to 4.5s.
  8982. #
  8983. # t=0.53
  8984. pm_stride = pm.stride()
  8985. pm_n = pm.n()
  8986. pm_alpha = pm.alpha()
  8987. src_stride = src_pix.stride()
  8988. src_n = src_pix.n()
  8989. #log( '{=pm_stride pm_n src_stride src_n}')
  8990. for y in range( h):
  8991. for x in range( w):
  8992. pm_i = pm_stride * y + pm_n * x
  8993. src_i = src_stride * y + src_n * x
  8994. pm_view[ pm_i : pm_i + n] = src_view[ src_i : src_i + n]
  8995. if pm_alpha:
  8996. pm_view[ pm_i + n] = 255
  8997. else:
  8998. # Copy individual bytes from Python. Very slow.
  8999. # test_pixmap.py:test_setalpha(): 6.89 t=2.601
  9000. if src_pix.alpha() == pm.alpha(): # identical samples
  9001. #memcpy(tptr, sptr, w * h * (n + alpha));
  9002. for i in range(w * h * (n + alpha)):
  9003. mupdf.fz_samples_set(pm, i, mupdf.fz_samples_get(src_pix, i))
  9004. else:
  9005. # t=2.56
  9006. tptr = 0
  9007. sptr = 0
  9008. src_pix_alpha = src_pix.alpha()
  9009. for i in range(w * h):
  9010. #memcpy(tptr, sptr, n);
  9011. for j in range(n):
  9012. mupdf.fz_samples_set(pm, tptr + j, mupdf.fz_samples_get(src_pix, sptr + j))
  9013. tptr += n
  9014. if pm.alpha():
  9015. mupdf.fz_samples_set(pm, tptr, 255)
  9016. tptr += 1
  9017. sptr += n + src_pix_alpha
  9018. self.this = pm
  9019. elif args_match(args, (mupdf.FzColorspace, Colorspace), int, int, None, (int, bool)):
  9020. # create pixmap from samples data
  9021. cs, w, h, samples, alpha = args
  9022. if isinstance(cs, Colorspace):
  9023. cs = cs.this
  9024. assert isinstance(cs, mupdf.FzColorspace)
  9025. n = mupdf.fz_colorspace_n(cs)
  9026. stride = (n + alpha) * w
  9027. seps = mupdf.FzSeparations()
  9028. pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
  9029. if isinstance( samples, (bytes, bytearray)):
  9030. #log('using mupdf.python_buffer_data()')
  9031. samples2 = mupdf.python_buffer_data(samples)
  9032. size = len(samples)
  9033. else:
  9034. res = JM_BufferFromBytes(samples)
  9035. if not res.m_internal:
  9036. raise ValueError( "bad samples data")
  9037. size, c = mupdf.fz_buffer_storage(res)
  9038. samples2 = mupdf.python_buffer_data(samples) # raw swig proxy for `const unsigned char*`.
  9039. if stride * h != size:
  9040. raise ValueError( f"bad samples length {w=} {h=} {alpha=} {n=} {stride=} {size=}")
  9041. mupdf.ll_fz_pixmap_copy_raw( pm.m_internal, samples2)
  9042. self.this = pm
  9043. elif args_match(args, None):
  9044. # create pixmap from filename, file object, pathlib.Path or memory
  9045. imagedata, = args
  9046. name = 'name'
  9047. if hasattr(imagedata, "resolve"):
  9048. fname = imagedata.__str__()
  9049. if fname:
  9050. img = mupdf.fz_new_image_from_file(fname)
  9051. elif hasattr(imagedata, name):
  9052. fname = imagedata.name
  9053. if fname:
  9054. img = mupdf.fz_new_image_from_file(fname)
  9055. elif isinstance(imagedata, str):
  9056. img = mupdf.fz_new_image_from_file(imagedata)
  9057. else:
  9058. res = JM_BufferFromBytes(imagedata)
  9059. if not res.m_internal or not res.m_internal.len:
  9060. raise ValueError( "bad image data")
  9061. img = mupdf.fz_new_image_from_buffer(res)
  9062. # Original code passed null for subarea and ctm, but that's not
  9063. # possible with MuPDF's python bindings. The equivalent is an
  9064. # infinite rect and identify matrix scaled by img.w() and img.h().
  9065. pm, w, h = mupdf.fz_get_pixmap_from_image(
  9066. img,
  9067. mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
  9068. mupdf.FzMatrix( img.w(), 0, 0, img.h(), 0, 0),
  9069. )
  9070. xres, yres = mupdf.fz_image_resolution(img)
  9071. pm.m_internal.xres = xres
  9072. pm.m_internal.yres = yres
  9073. self.this = pm
  9074. elif args_match(args, (Document, mupdf.FzDocument), int):
  9075. # Create pixmap from PDF image identified by XREF number
  9076. doc, xref = args
  9077. pdf = _as_pdf_document(doc)
  9078. xreflen = mupdf.pdf_xref_len(pdf)
  9079. if not _INRANGE(xref, 1, xreflen-1):
  9080. raise ValueError( MSG_BAD_XREF)
  9081. ref = mupdf.pdf_new_indirect(pdf, xref, 0)
  9082. type_ = mupdf.pdf_dict_get(ref, PDF_NAME('Subtype'))
  9083. if (not mupdf.pdf_name_eq(type_, PDF_NAME('Image'))
  9084. and not mupdf.pdf_name_eq(type_, PDF_NAME('Alpha'))
  9085. and not mupdf.pdf_name_eq(type_, PDF_NAME('Luminosity'))
  9086. ):
  9087. raise ValueError( MSG_IS_NO_IMAGE)
  9088. img = mupdf.pdf_load_image(pdf, ref)
  9089. # Original code passed null for subarea and ctm, but that's not
  9090. # possible with MuPDF's python bindings. The equivalent is an
  9091. # infinite rect and identify matrix scaled by img.w() and img.h().
  9092. pix, w, h = mupdf.fz_get_pixmap_from_image(
  9093. img,
  9094. mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
  9095. mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0),
  9096. )
  9097. self.this = pix
  9098. else:
  9099. text = 'Unrecognised args for constructing Pixmap:\n'
  9100. for arg in args:
  9101. text += f' {type(arg)}: {arg}\n'
  9102. raise Exception( text)
  9103. def __len__(self):
  9104. return self.size
  9105. def __repr__(self):
  9106. if not type(self) is Pixmap: return
  9107. if self.colorspace:
  9108. return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha)
  9109. else:
  9110. return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha)
  9111. def _tobytes(self, format_, jpg_quality):
  9112. '''
  9113. Pixmap._tobytes
  9114. '''
  9115. pm = self.this
  9116. size = mupdf.fz_pixmap_stride(pm) * pm.h()
  9117. res = mupdf.fz_new_buffer(size)
  9118. out = mupdf.FzOutput(res)
  9119. if format_ == 1: mupdf.fz_write_pixmap_as_png(out, pm)
  9120. elif format_ == 2: mupdf.fz_write_pixmap_as_pnm(out, pm)
  9121. elif format_ == 3: mupdf.fz_write_pixmap_as_pam(out, pm)
  9122. elif format_ == 5: mupdf.fz_write_pixmap_as_psd(out, pm)
  9123. elif format_ == 6: mupdf.fz_write_pixmap_as_ps(out, pm)
  9124. elif format_ == 7:
  9125. mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality, 0)
  9126. else:
  9127. mupdf.fz_write_pixmap_as_png(out, pm)
  9128. out.fz_close_output()
  9129. barray = JM_BinFromBuffer(res)
  9130. return barray
  9131. def _writeIMG(self, filename, format_, jpg_quality):
  9132. pm = self.this
  9133. if format_ == 1: mupdf.fz_save_pixmap_as_png(pm, filename)
  9134. elif format_ == 2: mupdf.fz_save_pixmap_as_pnm(pm, filename)
  9135. elif format_ == 3: mupdf.fz_save_pixmap_as_pam(pm, filename)
  9136. elif format_ == 5: mupdf.fz_save_pixmap_as_psd(pm, filename)
  9137. elif format_ == 6: mupdf.fz_save_pixmap_as_ps(pm, filename)
  9138. elif format_ == 7: mupdf.fz_save_pixmap_as_jpeg(pm, filename, jpg_quality)
  9139. else: mupdf.fz_save_pixmap_as_png(pm, filename)
  9140. @property
  9141. def alpha(self):
  9142. """Indicates presence of alpha channel."""
  9143. return mupdf.fz_pixmap_alpha(self.this)
  9144. def clear_with(self, value=None, bbox=None):
  9145. """Fill all color components with same value."""
  9146. if value is None:
  9147. mupdf.fz_clear_pixmap(self.this)
  9148. elif bbox is None:
  9149. mupdf.fz_clear_pixmap_with_value(self.this, value)
  9150. else:
  9151. JM_clear_pixmap_rect_with_value(self.this, value, JM_irect_from_py(bbox))
  9152. def color_count(self, colors=0, clip=None):
  9153. '''
  9154. Return count of each color.
  9155. '''
  9156. pm = self.this
  9157. rc = JM_color_count( pm, clip)
  9158. if not colors:
  9159. return len( rc)
  9160. return rc
  9161. def color_topusage(self, clip=None):
  9162. """Return most frequent color and its usage ratio."""
  9163. allpixels = 0
  9164. cnt = 0
  9165. if clip is not None and self.irect in Rect(clip):
  9166. clip = self.irect
  9167. for pixel, count in self.color_count(colors=True,clip=clip).items():
  9168. allpixels += count
  9169. if count > cnt:
  9170. cnt = count
  9171. maxpixel = pixel
  9172. if not allpixels:
  9173. return (1, bytes([255] * self.n))
  9174. return (cnt / allpixels, maxpixel)
  9175. @property
  9176. def colorspace(self):
  9177. """Pixmap Colorspace."""
  9178. cs = Colorspace(mupdf.fz_pixmap_colorspace(self.this))
  9179. if cs.name == "None":
  9180. return None
  9181. return cs
  9182. def copy(self, src, bbox):
  9183. """Copy bbox from another Pixmap."""
  9184. pm = self.this
  9185. src_pix = src.this
  9186. if not mupdf.fz_pixmap_colorspace(src_pix):
  9187. raise ValueError( "cannot copy pixmap with NULL colorspace")
  9188. if pm.alpha() != src_pix.alpha():
  9189. raise ValueError( "source and target alpha must be equal")
  9190. mupdf.fz_copy_pixmap_rect(pm, src_pix, JM_irect_from_py(bbox), mupdf.FzDefaultColorspaces(None))
  9191. @property
  9192. def digest(self):
  9193. """MD5 digest of pixmap (bytes)."""
  9194. ret = mupdf.fz_md5_pixmap2(self.this)
  9195. return bytes(ret)
  9196. def gamma_with(self, gamma):
  9197. """Apply correction with some float.
  9198. gamma=1 is a no-op."""
  9199. if not mupdf.fz_pixmap_colorspace( self.this):
  9200. message_warning("colorspace invalid for function")
  9201. return
  9202. mupdf.fz_gamma_pixmap( self.this, gamma)
  9203. @property
  9204. def h(self):
  9205. """The height."""
  9206. return mupdf.fz_pixmap_height(self.this)
  9207. def invert_irect(self, bbox=None):
  9208. """Invert the colors inside a bbox."""
  9209. pm = self.this
  9210. if not mupdf.fz_pixmap_colorspace(pm).m_internal:
  9211. message_warning("ignored for stencil pixmap")
  9212. return False
  9213. r = JM_irect_from_py(bbox)
  9214. if mupdf.fz_is_infinite_irect(r):
  9215. mupdf.fz_invert_pixmap(pm)
  9216. return True
  9217. mupdf.fz_invert_pixmap_rect(pm, r)
  9218. return True
  9219. @property
  9220. def irect(self):
  9221. """Pixmap bbox - an IRect object."""
  9222. val = mupdf.fz_pixmap_bbox(self.this)
  9223. return JM_py_from_irect( val)
  9224. @property
  9225. def is_monochrome(self):
  9226. """Check if pixmap is monochrome."""
  9227. return mupdf.fz_is_pixmap_monochrome( self.this)
  9228. @property
  9229. def is_unicolor(self):
  9230. '''
  9231. Check if pixmap has only one color.
  9232. '''
  9233. pm = self.this
  9234. n = pm.n()
  9235. count = pm.w() * pm.h() * n
  9236. def _pixmap_read_samples(pm, offset, n):
  9237. ret = list()
  9238. for i in range(n):
  9239. ret.append(mupdf.fz_samples_get(pm, offset+i))
  9240. return ret
  9241. for offset in range( 0, count, n):
  9242. if offset == 0:
  9243. sample0 = _pixmap_read_samples( pm, 0, n)
  9244. else:
  9245. sample = _pixmap_read_samples( pm, offset, n)
  9246. if sample != sample0:
  9247. return False
  9248. return True
  9249. @property
  9250. def n(self):
  9251. """The size of one pixel."""
  9252. if g_use_extra:
  9253. # Setting self.__class__.n gives a small reduction in overhead of
  9254. # test_general.py:test_2093, e.g. 1.4x -> 1.3x.
  9255. #return extra.pixmap_n(self.this)
  9256. def n2(self):
  9257. return extra.pixmap_n(self.this)
  9258. self.__class__.n = property(n2)
  9259. return self.n
  9260. return mupdf.fz_pixmap_components(self.this)
  9261. def pdfocr_save(self, filename, compress=1, language=None, tessdata=None):
  9262. '''
  9263. Save pixmap as an OCR-ed PDF page.
  9264. '''
  9265. tessdata = get_tessdata(tessdata)
  9266. opts = mupdf.FzPdfocrOptions()
  9267. opts.compress = compress
  9268. if language:
  9269. opts.language_set2( language)
  9270. if tessdata:
  9271. opts.datadir_set2( tessdata)
  9272. pix = self.this
  9273. if isinstance(filename, str):
  9274. mupdf.fz_save_pixmap_as_pdfocr( pix, filename, 0, opts)
  9275. else:
  9276. out = JM_new_output_fileptr( filename)
  9277. try:
  9278. mupdf.fz_write_pixmap_as_pdfocr( out, pix, opts)
  9279. finally:
  9280. out.fz_close_output() # Avoid MuPDF warning.
  9281. def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
  9282. """Save pixmap as an OCR-ed PDF page.
  9283. Args:
  9284. compress: (bool) compress, default 1 (True).
  9285. language: (str) language(s) occurring on page, default "eng" (English),
  9286. multiples like "eng+ger" for English and German.
  9287. tessdata: (str) folder name of Tesseract's language support. If None
  9288. we use environment variable TESSDATA_PREFIX or search for
  9289. Tesseract installation.
  9290. Notes:
  9291. On failure, make sure Tesseract is installed and you have set
  9292. <tessdata> or environment variable "TESSDATA_PREFIX" to the folder
  9293. containing your Tesseract's language support data.
  9294. """
  9295. tessdata = get_tessdata(tessdata)
  9296. from io import BytesIO
  9297. bio = BytesIO()
  9298. self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata)
  9299. return bio.getvalue()
  9300. def pil_image(self):
  9301. """Create a Pillow Image from the Pixmap."""
  9302. try:
  9303. from PIL import Image
  9304. except ImportError:
  9305. message("PIL/Pillow not installed")
  9306. raise
  9307. cspace = self.colorspace
  9308. if not cspace:
  9309. mode = "L"
  9310. elif cspace.n == 1:
  9311. mode = "L" if not self.alpha else "LA"
  9312. elif cspace.n == 3:
  9313. mode = "RGB" if not self.alpha else "RGBA"
  9314. else:
  9315. mode = "CMYK"
  9316. img = Image.frombytes(mode, (self.width, self.height), self.samples)
  9317. return img
  9318. def pil_save(self, *args, **kwargs):
  9319. """Write to image file using Pillow.
  9320. An intermediate PIL Image is created, and its "save" method is used
  9321. to store the image. See Pillow documentation to learn about the
  9322. meaning of possible positional and keyword parameters.
  9323. Use this when other output formats are desired.
  9324. """
  9325. img = self.pil_image()
  9326. if "dpi" not in kwargs.keys():
  9327. kwargs["dpi"] = (self.xres, self.yres)
  9328. img.save(*args, **kwargs)
  9329. def pil_tobytes(self, *args, **kwargs):
  9330. """Convert to an image in memory using Pillow.
  9331. An intermediate PIL Image is created, and its "save" method is used
  9332. to store the image. See Pillow documentation to learn about the
  9333. meaning of possible positional or keyword parameters.
  9334. Use this when other output formats are desired.
  9335. """
  9336. bytes_out = io.BytesIO()
  9337. img = self.pil_image()
  9338. if "dpi" not in kwargs.keys():
  9339. kwargs["dpi"] = (self.xres, self.yres)
  9340. img.save(bytes_out, *args, **kwargs)
  9341. return bytes_out.getvalue()
  9342. def pixel(self, x, y):
  9343. """Get color tuple of pixel (x, y).
  9344. Last item is the alpha if Pixmap.alpha is true."""
  9345. if g_use_extra:
  9346. return extra.pixmap_pixel(self.this.m_internal, x, y)
  9347. if (0
  9348. or x < 0
  9349. or x >= self.this.m_internal.w
  9350. or y < 0
  9351. or y >= self.this.m_internal.h
  9352. ):
  9353. RAISEPY(MSG_PIXEL_OUTSIDE, PyExc_ValueError)
  9354. n = self.this.m_internal.n
  9355. stride = self.this.m_internal.stride
  9356. i = stride * y + n * x
  9357. ret = tuple( self.samples_mv[ i: i+n])
  9358. return ret
  9359. @property
  9360. def samples(self)->bytes:
  9361. mv = self.samples_mv
  9362. return bytes( mv)
  9363. @property
  9364. def samples_mv(self):
  9365. '''
  9366. Pixmap samples memoryview.
  9367. '''
  9368. # We remember the returned memoryview so that our `__del__()` can
  9369. # release it; otherwise accessing it after we have been destructed will
  9370. # fail, possibly crashing Python; this is #4155.
  9371. #
  9372. if self._samples_mv is None:
  9373. self._samples_mv = mupdf.fz_pixmap_samples_memoryview(self.this)
  9374. return self._samples_mv
  9375. def _samples_mv_release(self):
  9376. if self._samples_mv:
  9377. self._samples_mv.release()
  9378. @property
  9379. def samples_ptr(self):
  9380. return mupdf.fz_pixmap_samples_int(self.this)
  9381. def save(self, filename, output=None, jpg_quality=95):
  9382. """Output as image in format determined by filename extension.
  9383. Args:
  9384. output: (str) only use to overrule filename extension. Default is PNG.
  9385. Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS.
  9386. """
  9387. valid_formats = {
  9388. "png": 1,
  9389. "pnm": 2,
  9390. "pgm": 2,
  9391. "ppm": 2,
  9392. "pbm": 2,
  9393. "pam": 3,
  9394. "psd": 5,
  9395. "ps": 6,
  9396. "jpg": 7,
  9397. "jpeg": 7,
  9398. }
  9399. if type(filename) is str:
  9400. pass
  9401. elif hasattr(filename, "absolute"):
  9402. filename = str(filename)
  9403. elif hasattr(filename, "name"):
  9404. filename = filename.name
  9405. if output is None:
  9406. _, ext = os.path.splitext(filename)
  9407. output = ext[1:]
  9408. idx = valid_formats.get(output.lower(), None)
  9409. if idx is None:
  9410. raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
  9411. if self.alpha and idx in (2, 6, 7):
  9412. raise ValueError("'%s' cannot have alpha" % output)
  9413. if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
  9414. raise ValueError(f"unsupported colorspace for '{output}'")
  9415. if idx == 7:
  9416. self.set_dpi(self.xres, self.yres)
  9417. return self._writeIMG(filename, idx, jpg_quality)
  9418. def set_alpha(self, alphavalues=None, premultiply=1, opaque=None, matte=None):
  9419. """Set alpha channel to values contained in a byte array.
  9420. If omitted, set alphas to 255.
  9421. Args:
  9422. alphavalues: (bytes) with length (width * height) or 'None'.
  9423. premultiply: (bool, True) premultiply colors with alpha values.
  9424. opaque: (tuple, length colorspace.n) this color receives opacity 0.
  9425. matte: (tuple, length colorspace.n)) preblending background color.
  9426. """
  9427. pix = self.this
  9428. alpha = 0
  9429. m = 0
  9430. if pix.alpha() == 0:
  9431. raise ValueError( MSG_PIX_NOALPHA)
  9432. n = mupdf.fz_pixmap_colorants(pix)
  9433. w = mupdf.fz_pixmap_width(pix)
  9434. h = mupdf.fz_pixmap_height(pix)
  9435. balen = w * h * (n+1)
  9436. colors = [0, 0, 0, 0] # make this color opaque
  9437. bgcolor = [0, 0, 0, 0] # preblending background color
  9438. zero_out = 0
  9439. bground = 0
  9440. if opaque and isinstance(opaque, (list, tuple)) and len(opaque) == n:
  9441. for i in range(n):
  9442. colors[i] = opaque[i]
  9443. zero_out = 1
  9444. if matte and isinstance( matte, (tuple, list)) and len(matte) == n:
  9445. for i in range(n):
  9446. bgcolor[i] = matte[i]
  9447. bground = 1
  9448. data = bytes()
  9449. data_len = 0
  9450. if alphavalues:
  9451. #res = JM_BufferFromBytes(alphavalues)
  9452. #data_len, data = mupdf.fz_buffer_storage(res)
  9453. #if data_len < w * h:
  9454. # THROWMSG("bad alpha values")
  9455. # fixme: don't seem to need to create an fz_buffer - can
  9456. # use <alphavalues> directly?
  9457. if isinstance(alphavalues, (bytes, bytearray)):
  9458. data = alphavalues
  9459. data_len = len(alphavalues)
  9460. else:
  9461. assert 0, f'unexpected type for alphavalues: {type(alphavalues)}'
  9462. if data_len < w * h:
  9463. raise ValueError( "bad alpha values")
  9464. if 1:
  9465. # Use C implementation for speed.
  9466. mupdf.Pixmap_set_alpha_helper(
  9467. balen,
  9468. n,
  9469. data_len,
  9470. zero_out,
  9471. mupdf.python_buffer_data( data),
  9472. pix.m_internal,
  9473. premultiply,
  9474. bground,
  9475. colors,
  9476. bgcolor,
  9477. )
  9478. else:
  9479. i = k = j = 0
  9480. data_fix = 255
  9481. while i < balen:
  9482. alpha = data[k]
  9483. if zero_out:
  9484. for j in range(i, i+n):
  9485. if mupdf.fz_samples_get(pix, j) != colors[j - i]:
  9486. data_fix = 255
  9487. break
  9488. else:
  9489. data_fix = 0
  9490. if data_len:
  9491. def fz_mul255( a, b):
  9492. x = a * b + 128
  9493. x += x // 256
  9494. return x // 256
  9495. if data_fix == 0:
  9496. mupdf.fz_samples_set(pix, i+n, 0)
  9497. else:
  9498. mupdf.fz_samples_set(pix, i+n, alpha)
  9499. if premultiply and not bground:
  9500. for j in range(i, i+n):
  9501. mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j), alpha))
  9502. elif bground:
  9503. for j in range( i, i+n):
  9504. m = bgcolor[j - i]
  9505. mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j) - m, alpha))
  9506. else:
  9507. mupdf.fz_samples_set(pix, i+n, data_fix)
  9508. i += n+1
  9509. k += 1
  9510. def tobytes(self, output="png", jpg_quality=95):
  9511. '''
  9512. Convert to binary image stream of desired type.
  9513. '''
  9514. valid_formats = {
  9515. "png": 1,
  9516. "pnm": 2,
  9517. "pgm": 2,
  9518. "ppm": 2,
  9519. "pbm": 2,
  9520. "pam": 3,
  9521. "tga": 4,
  9522. "tpic": 4,
  9523. "psd": 5,
  9524. "ps": 6,
  9525. 'jpg': 7,
  9526. 'jpeg': 7,
  9527. }
  9528. idx = valid_formats.get(output.lower(), None)
  9529. if idx is None:
  9530. raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
  9531. if self.alpha and idx in (2, 6, 7):
  9532. raise ValueError("'{output}' cannot have alpha")
  9533. if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
  9534. raise ValueError(f"unsupported colorspace for '{output}'")
  9535. if idx == 7:
  9536. self.set_dpi(self.xres, self.yres)
  9537. barray = self._tobytes(idx, jpg_quality)
  9538. return barray
  9539. def set_dpi(self, xres, yres):
  9540. """Set resolution in both dimensions."""
  9541. pm = self.this
  9542. pm.m_internal.xres = xres
  9543. pm.m_internal.yres = yres
  9544. def set_origin(self, x, y):
  9545. """Set top-left coordinates."""
  9546. pm = self.this
  9547. pm.m_internal.x = x
  9548. pm.m_internal.y = y
  9549. def set_pixel(self, x, y, color):
  9550. """Set color of pixel (x, y)."""
  9551. if g_use_extra:
  9552. return extra.set_pixel(self.this.m_internal, x, y, color)
  9553. pm = self.this
  9554. if not _INRANGE(x, 0, pm.w() - 1) or not _INRANGE(y, 0, pm.h() - 1):
  9555. raise ValueError( MSG_PIXEL_OUTSIDE)
  9556. n = pm.n()
  9557. for j in range(n):
  9558. i = color[j]
  9559. if not _INRANGE(i, 0, 255):
  9560. raise ValueError( MSG_BAD_COLOR_SEQ)
  9561. stride = mupdf.fz_pixmap_stride( pm)
  9562. i = stride * y + n * x
  9563. if 0:
  9564. # Using a cached self._memory_view doesn't actually make much
  9565. # difference to speed.
  9566. if not self._memory_view:
  9567. self._memory_view = self.samples_mv
  9568. for j in range(n):
  9569. self._memory_view[i + j] = color[j]
  9570. else:
  9571. for j in range(n):
  9572. pm.fz_samples_set(i + j, color[j])
  9573. def set_rect(self, bbox, color):
  9574. """Set color of all pixels in bbox."""
  9575. pm = self.this
  9576. n = pm.n()
  9577. c = []
  9578. for j in range(n):
  9579. i = color[j]
  9580. if not _INRANGE(i, 0, 255):
  9581. raise ValueError( MSG_BAD_COLOR_SEQ)
  9582. c.append(i)
  9583. bbox = JM_irect_from_py(bbox)
  9584. i = JM_fill_pixmap_rect_with_color(pm, c, bbox)
  9585. rc = bool(i)
  9586. return rc
  9587. def shrink(self, factor):
  9588. """Divide width and height by 2**factor.
  9589. E.g. factor=1 shrinks to 25% of original size (in place)."""
  9590. if factor < 1:
  9591. message_warning("ignoring shrink factor < 1")
  9592. return
  9593. mupdf.fz_subsample_pixmap( self.this, factor)
  9594. # Pixmap has changed so clear our memory view.
  9595. self._memory_view = None
  9596. self._samples_mv_release()
  9597. @property
  9598. def size(self):
  9599. """Pixmap size."""
  9600. return mupdf.fz_pixmap_size( self.this)
  9601. @property
  9602. def stride(self):
  9603. """Length of one image line (width * n)."""
  9604. return self.this.stride()
  9605. def tint_with(self, black, white):
  9606. """Tint colors with modifiers for black and white."""
  9607. if not self.colorspace or self.colorspace.n > 3:
  9608. message("warning: colorspace invalid for function")
  9609. return
  9610. return mupdf.fz_tint_pixmap( self.this, black, white)
  9611. @property
  9612. def w(self):
  9613. """The width."""
  9614. return mupdf.fz_pixmap_width(self.this)
  9615. def warp(self, quad, width, height):
  9616. """Return pixmap from a warped quad."""
  9617. if not quad.is_convex: raise ValueError("quad must be convex")
  9618. q = JM_quad_from_py(quad)
  9619. points = [ q.ul, q.ur, q.lr, q.ll]
  9620. dst = mupdf.fz_warp_pixmap( self.this, points, width, height)
  9621. return Pixmap( dst)
  9622. @property
  9623. def x(self):
  9624. """x component of Pixmap origin."""
  9625. return mupdf.fz_pixmap_x(self.this)
  9626. @property
  9627. def xres(self):
  9628. """Resolution in x direction."""
  9629. return self.this.xres()
  9630. @property
  9631. def y(self):
  9632. """y component of Pixmap origin."""
  9633. return mupdf.fz_pixmap_y(self.this)
  9634. @property
  9635. def yres(self):
  9636. """Resolution in y direction."""
  9637. return self.this.yres()
  9638. width = w
  9639. height = h
  9640. def __del__(self):
  9641. if self._samples_mv:
  9642. self._samples_mv.release()
  9643. del Point
  9644. class Point:
  9645. def __abs__(self):
  9646. return math.sqrt(self.x * self.x + self.y * self.y)
  9647. def __add__(self, p):
  9648. if hasattr(p, "__float__"):
  9649. return Point(self.x + p, self.y + p)
  9650. if len(p) != 2:
  9651. raise ValueError("Point: bad seq len")
  9652. return Point(self.x + p[0], self.y + p[1])
  9653. def __bool__(self):
  9654. return not (max(self) == min(self) == 0)
  9655. def __eq__(self, p):
  9656. if not hasattr(p, "__len__"):
  9657. return False
  9658. return len(p) == 2 and not (self - p)
  9659. def __getitem__(self, i):
  9660. return (self.x, self.y)[i]
  9661. def __hash__(self):
  9662. return hash(tuple(self))
  9663. def __init__(self, *args, x=None, y=None):
  9664. '''
  9665. Point() - all zeros
  9666. Point(x, y)
  9667. Point(Point) - new copy
  9668. Point(sequence) - from 'sequence'
  9669. Explicit keyword args x, y override earlier settings if not None.
  9670. '''
  9671. if not args:
  9672. self.x = 0.0
  9673. self.y = 0.0
  9674. elif len(args) > 2:
  9675. raise ValueError("Point: bad seq len")
  9676. elif len(args) == 2:
  9677. self.x = float(args[0])
  9678. self.y = float(args[1])
  9679. elif len(args) == 1:
  9680. l = args[0]
  9681. if isinstance(l, (mupdf.FzPoint, mupdf.fz_point)):
  9682. self.x = l.x
  9683. self.y = l.y
  9684. else:
  9685. if not hasattr(l, "__getitem__"):
  9686. raise ValueError("Point: bad args")
  9687. if len(l) != 2:
  9688. raise ValueError("Point: bad seq len")
  9689. self.x = float(l[0])
  9690. self.y = float(l[1])
  9691. else:
  9692. raise ValueError("Point: bad seq len")
  9693. if x is not None: self.x = x
  9694. if y is not None: self.y = y
  9695. def __len__(self):
  9696. return 2
  9697. def __mul__(self, m):
  9698. if hasattr(m, "__float__"):
  9699. return Point(self.x * m, self.y * m)
  9700. if hasattr(m, "__getitem__") and len(m) == 2:
  9701. # dot product
  9702. return self.x * m[0] + self.y * m[1]
  9703. p = Point(self)
  9704. return p.transform(m)
  9705. def __neg__(self):
  9706. return Point(-self.x, -self.y)
  9707. def __nonzero__(self):
  9708. return not (max(self) == min(self) == 0)
  9709. def __pos__(self):
  9710. return Point(self)
  9711. def __repr__(self):
  9712. return "Point" + str(tuple(self))
  9713. def __setitem__(self, i, v):
  9714. v = float(v)
  9715. if i == 0: self.x = v
  9716. elif i == 1: self.y = v
  9717. else:
  9718. raise IndexError("index out of range")
  9719. return None
  9720. def __sub__(self, p):
  9721. if hasattr(p, "__float__"):
  9722. return Point(self.x - p, self.y - p)
  9723. if len(p) != 2:
  9724. raise ValueError("Point: bad seq len")
  9725. return Point(self.x - p[0], self.y - p[1])
  9726. def __truediv__(self, m):
  9727. if hasattr(m, "__float__"):
  9728. return Point(self.x * 1./m, self.y * 1./m)
  9729. m1 = util_invert_matrix(m)[1]
  9730. if not m1:
  9731. raise ZeroDivisionError("matrix not invertible")
  9732. p = Point(self)
  9733. return p.transform(m1)
  9734. @property
  9735. def abs_unit(self):
  9736. """Unit vector with positive coordinates."""
  9737. s = self.x * self.x + self.y * self.y
  9738. if s < EPSILON:
  9739. return Point(0,0)
  9740. s = math.sqrt(s)
  9741. return Point(abs(self.x) / s, abs(self.y) / s)
  9742. def distance_to(self, *args):
  9743. """Return distance to rectangle or another point."""
  9744. if not len(args) > 0:
  9745. raise ValueError("at least one parameter must be given")
  9746. x = args[0]
  9747. if len(x) == 2:
  9748. x = Point(x)
  9749. elif len(x) == 4:
  9750. x = Rect(x)
  9751. else:
  9752. raise ValueError("arg1 must be point-like or rect-like")
  9753. if len(args) > 1:
  9754. unit = args[1]
  9755. else:
  9756. unit = "px"
  9757. u = {"px": (1.,1.), "in": (1.,72.), "cm": (2.54, 72.),
  9758. "mm": (25.4, 72.)}
  9759. f = u[unit][0] / u[unit][1]
  9760. if type(x) is Point:
  9761. return abs(self - x) * f
  9762. # from here on, x is a rectangle
  9763. # as a safeguard, make a finite copy of it
  9764. r = Rect(x.top_left, x.top_left)
  9765. r = r | x.bottom_right
  9766. if self in r:
  9767. return 0.0
  9768. if self.x > r.x1:
  9769. if self.y >= r.y1:
  9770. return self.distance_to(r.bottom_right, unit)
  9771. elif self.y <= r.y0:
  9772. return self.distance_to(r.top_right, unit)
  9773. else:
  9774. return (self.x - r.x1) * f
  9775. elif r.x0 <= self.x <= r.x1:
  9776. if self.y >= r.y1:
  9777. return (self.y - r.y1) * f
  9778. else:
  9779. return (r.y0 - self.y) * f
  9780. else:
  9781. if self.y >= r.y1:
  9782. return self.distance_to(r.bottom_left, unit)
  9783. elif self.y <= r.y0:
  9784. return self.distance_to(r.top_left, unit)
  9785. else:
  9786. return (r.x0 - self.x) * f
  9787. def transform(self, m):
  9788. """Replace point by its transformation with matrix-like m."""
  9789. if len(m) != 6:
  9790. raise ValueError("Matrix: bad seq len")
  9791. self.x, self.y = util_transform_point(self, m)
  9792. return self
  9793. @property
  9794. def unit(self):
  9795. """Unit vector of the point."""
  9796. s = self.x * self.x + self.y * self.y
  9797. if s < EPSILON:
  9798. return Point(0,0)
  9799. s = math.sqrt(s)
  9800. return Point(self.x / s, self.y / s)
  9801. __div__ = __truediv__
  9802. norm = __abs__
  9803. class Quad:
  9804. def __abs__(self):
  9805. if self.is_empty:
  9806. return 0.0
  9807. return abs(self.ul - self.ur) * abs(self.ul - self.ll)
  9808. def __add__(self, q):
  9809. if hasattr(q, "__float__"):
  9810. return Quad(self.ul + q, self.ur + q, self.ll + q, self.lr + q)
  9811. if len(q) != 4:
  9812. raise ValueError("Quad: bad seq len")
  9813. return Quad(self.ul + q[0], self.ur + q[1], self.ll + q[2], self.lr + q[3])
  9814. def __bool__(self):
  9815. return not self.is_empty
  9816. def __contains__(self, x):
  9817. try:
  9818. l = x.__len__()
  9819. except Exception:
  9820. if g_exceptions_verbose > 1: exception_info()
  9821. return False
  9822. if l == 2:
  9823. return util_point_in_quad(x, self)
  9824. if l != 4:
  9825. return False
  9826. if CheckRect(x):
  9827. if Rect(x).is_empty:
  9828. return True
  9829. return util_point_in_quad(x[:2], self) and util_point_in_quad(x[2:], self)
  9830. if CheckQuad(x):
  9831. for i in range(4):
  9832. if not util_point_in_quad(x[i], self):
  9833. return False
  9834. return True
  9835. return False
  9836. def __eq__(self, quad):
  9837. if not hasattr(quad, "__len__"):
  9838. return False
  9839. return len(quad) == 4 and (
  9840. self.ul == quad[0] and
  9841. self.ur == quad[1] and
  9842. self.ll == quad[2] and
  9843. self.lr == quad[3]
  9844. )
  9845. def __getitem__(self, i):
  9846. return (self.ul, self.ur, self.ll, self.lr)[i]
  9847. def __hash__(self):
  9848. return hash(tuple(self))
  9849. def __init__(self, *args, ul=None, ur=None, ll=None, lr=None):
  9850. '''
  9851. Quad() - all zero points
  9852. Quad(ul, ur, ll, lr)
  9853. Quad(quad) - new copy
  9854. Quad(sequence) - from 'sequence'
  9855. Explicit keyword args ul, ur, ll, lr override earlier settings if not
  9856. None.
  9857. '''
  9858. if not args:
  9859. self.ul = self.ur = self.ll = self.lr = Point()
  9860. elif len(args) > 4:
  9861. raise ValueError("Quad: bad seq len")
  9862. elif len(args) == 4:
  9863. self.ul, self.ur, self.ll, self.lr = map(Point, args)
  9864. elif len(args) == 1:
  9865. l = args[0]
  9866. if isinstance(l, mupdf.FzQuad):
  9867. self.this = l
  9868. self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr)
  9869. elif not hasattr(l, "__getitem__"):
  9870. raise ValueError("Quad: bad args")
  9871. elif len(l) != 4:
  9872. raise ValueError("Quad: bad seq len")
  9873. else:
  9874. self.ul, self.ur, self.ll, self.lr = map(Point, l)
  9875. else:
  9876. raise ValueError("Quad: bad args")
  9877. if ul is not None: self.ul = Point(ul)
  9878. if ur is not None: self.ur = Point(ur)
  9879. if ll is not None: self.ll = Point(ll)
  9880. if lr is not None: self.lr = Point(lr)
  9881. def __len__(self):
  9882. return 4
  9883. def __mul__(self, m):
  9884. q = Quad(self)
  9885. q = q.transform(m)
  9886. return q
  9887. def __neg__(self):
  9888. return Quad(-self.ul, -self.ur, -self.ll, -self.lr)
  9889. def __nonzero__(self):
  9890. return not self.is_empty
  9891. def __pos__(self):
  9892. return Quad(self)
  9893. def __repr__(self):
  9894. return "Quad" + str(tuple(self))
  9895. def __setitem__(self, i, v):
  9896. if i == 0: self.ul = Point(v)
  9897. elif i == 1: self.ur = Point(v)
  9898. elif i == 2: self.ll = Point(v)
  9899. elif i == 3: self.lr = Point(v)
  9900. else:
  9901. raise IndexError("index out of range")
  9902. return None
  9903. def __sub__(self, q):
  9904. if hasattr(q, "__float__"):
  9905. return Quad(self.ul - q, self.ur - q, self.ll - q, self.lr - q)
  9906. if len(q) != 4:
  9907. raise ValueError("Quad: bad seq len")
  9908. return Quad(self.ul - q[0], self.ur - q[1], self.ll - q[2], self.lr - q[3])
  9909. def __truediv__(self, m):
  9910. if hasattr(m, "__float__"):
  9911. im = 1. / m
  9912. else:
  9913. im = util_invert_matrix(m)[1]
  9914. if not im:
  9915. raise ZeroDivisionError("Matrix not invertible")
  9916. q = Quad(self)
  9917. q = q.transform(im)
  9918. return q
  9919. @property
  9920. def is_convex(self):
  9921. """Check if quad is convex and not degenerate.
  9922. Notes:
  9923. Check that for the two diagonals, the other two corners are not
  9924. on the same side of the diagonal.
  9925. Returns:
  9926. True or False.
  9927. """
  9928. m = planish_line(self.ul, self.lr) # puts this diagonal on x-axis
  9929. p1 = self.ll * m # transform the
  9930. p2 = self.ur * m # other two points
  9931. if p1.y * p2.y > 0:
  9932. return False
  9933. m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis
  9934. p1 = self.lr * m # transform the
  9935. p2 = self.ul * m # remaining points
  9936. if p1.y * p2.y > 0:
  9937. return False
  9938. return True
  9939. @property
  9940. def is_empty(self):
  9941. """Check whether all quad corners are on the same line.
  9942. This is the case if width or height is zero.
  9943. """
  9944. return self.width < EPSILON or self.height < EPSILON
  9945. @property
  9946. def is_infinite(self):
  9947. """Check whether this is the infinite quad."""
  9948. return self.rect.is_infinite
  9949. @property
  9950. def is_rectangular(self):
  9951. """Check if quad is rectangular.
  9952. Notes:
  9953. Some rotation matrix can thus transform it into a rectangle.
  9954. This is equivalent to three corners enclose 90 degrees.
  9955. Returns:
  9956. True or False.
  9957. """
  9958. sine = util_sine_between(self.ul, self.ur, self.lr)
  9959. if abs(sine - 1) > EPSILON: # the sine of the angle
  9960. return False
  9961. sine = util_sine_between(self.ur, self.lr, self.ll)
  9962. if abs(sine - 1) > EPSILON:
  9963. return False
  9964. sine = util_sine_between(self.lr, self.ll, self.ul)
  9965. if abs(sine - 1) > EPSILON:
  9966. return False
  9967. return True
  9968. def morph(self, p, m):
  9969. """Morph the quad with matrix-like 'm' and point-like 'p'.
  9970. Return a new quad."""
  9971. if self.is_infinite:
  9972. return INFINITE_QUAD()
  9973. delta = Matrix(1, 1).pretranslate(p.x, p.y)
  9974. q = self * ~delta * m * delta
  9975. return q
  9976. @property
  9977. def rect(self):
  9978. r = Rect()
  9979. r.x0 = min(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
  9980. r.y0 = min(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
  9981. r.x1 = max(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
  9982. r.y1 = max(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
  9983. return r
  9984. def transform(self, m):
  9985. """Replace quad by its transformation with matrix m."""
  9986. if hasattr(m, "__float__"):
  9987. pass
  9988. elif len(m) != 6:
  9989. raise ValueError("Matrix: bad seq len")
  9990. self.ul *= m
  9991. self.ur *= m
  9992. self.ll *= m
  9993. self.lr *= m
  9994. return self
  9995. __div__ = __truediv__
  9996. width = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr)))
  9997. height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr)))
  9998. class Rect:
  9999. def __abs__(self):
  10000. if self.is_empty or self.is_infinite:
  10001. return 0.0
  10002. return (self.x1 - self.x0) * (self.y1 - self.y0)
  10003. def __add__(self, p):
  10004. if hasattr(p, "__float__"):
  10005. return Rect(self.x0 + p, self.y0 + p, self.x1 + p, self.y1 + p)
  10006. if len(p) != 4:
  10007. raise ValueError("Rect: bad seq len")
  10008. return Rect(self.x0 + p[0], self.y0 + p[1], self.x1 + p[2], self.y1 + p[3])
  10009. def __and__(self, x):
  10010. if not hasattr(x, "__len__"):
  10011. raise ValueError("bad operand 2")
  10012. r1 = Rect(x)
  10013. r = Rect(self)
  10014. return r.intersect(r1)
  10015. def __bool__(self):
  10016. return not (max(self) == min(self) == 0)
  10017. def __contains__(self, x):
  10018. if hasattr(x, "__float__"):
  10019. return x in tuple(self)
  10020. l = len(x)
  10021. if l == 2:
  10022. return util_is_point_in_rect(x, self)
  10023. if l == 4:
  10024. r = INFINITE_RECT()
  10025. try:
  10026. r = Rect(x)
  10027. except Exception:
  10028. if g_exceptions_verbose > 1: exception_info()
  10029. r = Quad(x).rect
  10030. return (self.x0 <= r.x0 <= r.x1 <= self.x1 and
  10031. self.y0 <= r.y0 <= r.y1 <= self.y1)
  10032. return False
  10033. def __eq__(self, rect):
  10034. if not hasattr(rect, "__len__"):
  10035. return False
  10036. return len(rect) == 4 and not (self - rect)
  10037. def __getitem__(self, i):
  10038. return (self.x0, self.y0, self.x1, self.y1)[i]
  10039. def __hash__(self):
  10040. return hash(tuple(self))
  10041. def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
  10042. """
  10043. Rect() - all zeros
  10044. Rect(x0, y0, x1, y1)
  10045. Rect(top-left, x1, y1)
  10046. Rect(x0, y0, bottom-right)
  10047. Rect(top-left, bottom-right)
  10048. Rect(Rect or IRect) - new copy
  10049. Rect(sequence) - from 'sequence'
  10050. Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings
  10051. if not None.
  10052. """
  10053. x0, y0, x1, y1 = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
  10054. self.x0 = float( x0)
  10055. self.y0 = float( y0)
  10056. self.x1 = float( x1)
  10057. self.y1 = float( y1)
  10058. def __len__(self):
  10059. return 4
  10060. def __mul__(self, m):
  10061. if hasattr(m, "__float__"):
  10062. return Rect(self.x0 * m, self.y0 * m, self.x1 * m, self.y1 * m)
  10063. r = Rect(self)
  10064. r = r.transform(m)
  10065. return r
  10066. def __neg__(self):
  10067. return Rect(-self.x0, -self.y0, -self.x1, -self.y1)
  10068. def __nonzero__(self):
  10069. return not (max(self) == min(self) == 0)
  10070. def __or__(self, x):
  10071. if not hasattr(x, "__len__"):
  10072. raise ValueError("bad operand 2")
  10073. r = Rect(self)
  10074. if len(x) == 2:
  10075. return r.include_point(x)
  10076. if len(x) == 4:
  10077. return r.include_rect(x)
  10078. raise ValueError("bad operand 2")
  10079. def __pos__(self):
  10080. return Rect(self)
  10081. def __repr__(self):
  10082. return "Rect" + str(tuple(self))
  10083. def __setitem__(self, i, v):
  10084. v = float(v)
  10085. if i == 0: self.x0 = v
  10086. elif i == 1: self.y0 = v
  10087. elif i == 2: self.x1 = v
  10088. elif i == 3: self.y1 = v
  10089. else:
  10090. raise IndexError("index out of range")
  10091. return None
  10092. def __sub__(self, p):
  10093. if hasattr(p, "__float__"):
  10094. return Rect(self.x0 - p, self.y0 - p, self.x1 - p, self.y1 - p)
  10095. if len(p) != 4:
  10096. raise ValueError("Rect: bad seq len")
  10097. return Rect(self.x0 - p[0], self.y0 - p[1], self.x1 - p[2], self.y1 - p[3])
  10098. def __truediv__(self, m):
  10099. if hasattr(m, "__float__"):
  10100. return Rect(self.x0 * 1./m, self.y0 * 1./m, self.x1 * 1./m, self.y1 * 1./m)
  10101. im = util_invert_matrix(m)[1]
  10102. if not im:
  10103. raise ZeroDivisionError(f"Matrix not invertible: {m}")
  10104. r = Rect(self)
  10105. r = r.transform(im)
  10106. return r
  10107. @property
  10108. def bottom_left(self):
  10109. """Bottom-left corner."""
  10110. return Point(self.x0, self.y1)
  10111. @property
  10112. def bottom_right(self):
  10113. """Bottom-right corner."""
  10114. return Point(self.x1, self.y1)
  10115. def contains(self, x):
  10116. """Check if containing point-like or rect-like x."""
  10117. return self.__contains__(x)
  10118. @property
  10119. def height(self):
  10120. return max(0, self.y1 - self.y0)
  10121. def include_point(self, p):
  10122. """Extend to include point-like p."""
  10123. if len(p) != 2:
  10124. raise ValueError("Point: bad seq len")
  10125. self.x0, self.y0, self.x1, self.y1 = util_include_point_in_rect(self, p)
  10126. return self
  10127. def include_rect(self, r):
  10128. """Extend to include rect-like r."""
  10129. if len(r) != 4:
  10130. raise ValueError("Rect: bad seq len")
  10131. r = Rect(r)
  10132. if r.is_infinite or self.is_infinite:
  10133. self.x0, self.y0, self.x1, self.y1 = FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT
  10134. elif r.is_empty:
  10135. return self
  10136. elif self.is_empty:
  10137. self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
  10138. else:
  10139. self.x0, self.y0, self.x1, self.y1 = util_union_rect(self, r)
  10140. return self
  10141. def intersect(self, r):
  10142. """Restrict to common rect with rect-like r."""
  10143. if not len(r) == 4:
  10144. raise ValueError("Rect: bad seq len")
  10145. r = Rect(r)
  10146. if r.is_infinite:
  10147. return self
  10148. elif self.is_infinite:
  10149. self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
  10150. elif r.is_empty:
  10151. self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
  10152. elif self.is_empty:
  10153. return self
  10154. else:
  10155. self.x0, self.y0, self.x1, self.y1 = util_intersect_rect(self, r)
  10156. return self
  10157. def intersects(self, x):
  10158. """Check if intersection with rectangle x is not empty."""
  10159. rect2 = Rect(x)
  10160. return (1
  10161. and not self.is_empty
  10162. and not self.is_infinite
  10163. and not rect2.is_empty
  10164. and not rect2.is_infinite
  10165. and self.x0 < rect2.x1
  10166. and rect2.x0 < self.x1
  10167. and self.y0 < rect2.y1
  10168. and rect2.y0 < self.y1
  10169. )
  10170. @property
  10171. def is_empty(self):
  10172. """True if rectangle area is empty."""
  10173. return self.x0 >= self.x1 or self.y0 >= self.y1
  10174. @property
  10175. def is_infinite(self):
  10176. """True if this is the infinite rectangle."""
  10177. return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
  10178. @property
  10179. def is_valid(self):
  10180. """True if rectangle is valid."""
  10181. return self.x0 <= self.x1 and self.y0 <= self.y1
  10182. def morph(self, p, m):
  10183. """Morph with matrix-like m and point-like p.
  10184. Returns a new quad."""
  10185. if self.is_infinite:
  10186. return INFINITE_QUAD()
  10187. return self.quad.morph(p, m)
  10188. def norm(self):
  10189. return math.sqrt(sum([c*c for c in self]))
  10190. def normalize(self):
  10191. """Replace rectangle with its finite version."""
  10192. if self.x1 < self.x0:
  10193. self.x0, self.x1 = self.x1, self.x0
  10194. if self.y1 < self.y0:
  10195. self.y0, self.y1 = self.y1, self.y0
  10196. return self
  10197. @property
  10198. def quad(self):
  10199. """Return Quad version of rectangle."""
  10200. return Quad(self.tl, self.tr, self.bl, self.br)
  10201. def round(self):
  10202. """Return the IRect."""
  10203. return IRect(util_round_rect(self))
  10204. @property
  10205. def top_left(self):
  10206. """Top-left corner."""
  10207. return Point(self.x0, self.y0)
  10208. @property
  10209. def top_right(self):
  10210. """Top-right corner."""
  10211. return Point(self.x1, self.y0)
  10212. def torect(self, r):
  10213. """Return matrix that converts to target rect."""
  10214. r = Rect(r)
  10215. if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
  10216. raise ValueError("rectangles must be finite and not empty")
  10217. return (
  10218. Matrix(1, 0, 0, 1, -self.x0, -self.y0)
  10219. * Matrix(r.width / self.width, r.height / self.height)
  10220. * Matrix(1, 0, 0, 1, r.x0, r.y0)
  10221. )
  10222. def transform(self, m):
  10223. """Replace with the transformation by matrix-like m."""
  10224. if not len(m) == 6:
  10225. raise ValueError("Matrix: bad seq len")
  10226. self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m)
  10227. return self
  10228. @property
  10229. def width(self):
  10230. return max(0, self.x1 - self.x0)
  10231. __div__ = __truediv__
  10232. bl = bottom_left
  10233. br = bottom_right
  10234. irect = property(round)
  10235. tl = top_left
  10236. tr = top_right
  10237. class Story:
  10238. def __init__( self, html='', user_css=None, em=12, archive=None):
  10239. buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8'))
  10240. if archive and not isinstance(archive, Archive):
  10241. archive = Archive(archive)
  10242. arch = archive.this if archive else mupdf.FzArchive( None)
  10243. if hasattr(mupdf, 'FzStoryS'):
  10244. self.this = mupdf.FzStoryS( buffer_, user_css, em, arch)
  10245. else:
  10246. self.this = mupdf.FzStory( buffer_, user_css, em, arch)
  10247. def add_header_ids(self):
  10248. '''
  10249. Look for `<h1..6>` items in `self` and adds unique `id`
  10250. attributes if not already present.
  10251. '''
  10252. dom = self.body
  10253. i = 0
  10254. x = dom.find(None, None, None)
  10255. while x:
  10256. name = x.tagname
  10257. if len(name) == 2 and name[0]=="h" and name[1] in "123456":
  10258. attr = x.get_attribute_value("id")
  10259. if not attr:
  10260. id_ = f"h_id_{i}"
  10261. #log(f"{name=}: setting {id_=}")
  10262. x.set_attribute("id", id_)
  10263. i += 1
  10264. x = x.find_next(None, None, None)
  10265. @staticmethod
  10266. def add_pdf_links(document_or_stream, positions):
  10267. """
  10268. Adds links to PDF document.
  10269. Args:
  10270. document_or_stream:
  10271. A PDF `Document` or raw PDF content, for example an
  10272. `io.BytesIO` instance.
  10273. positions:
  10274. List of `ElementPosition`'s for `document_or_stream`,
  10275. typically from Story.element_positions(). We raise an
  10276. exception if two or more positions have same id.
  10277. Returns:
  10278. `document_or_stream` if a `Document` instance, otherwise a
  10279. new `Document` instance.
  10280. We raise an exception if an `href` in `positions` refers to an
  10281. internal position `#<name>` but no item in `positions` has `id =
  10282. name`.
  10283. """
  10284. if isinstance(document_or_stream, Document):
  10285. document = document_or_stream
  10286. else:
  10287. document = Document("pdf", document_or_stream)
  10288. # Create dict from id to position, which we will use to find
  10289. # link destinations.
  10290. #
  10291. id_to_position = dict()
  10292. #log(f"positions: {positions}")
  10293. for position in positions:
  10294. #log(f"add_pdf_links(): position: {position}")
  10295. if (position.open_close & 1) and position.id:
  10296. #log(f"add_pdf_links(): position with id: {position}")
  10297. if position.id in id_to_position:
  10298. #log(f"Ignoring duplicate positions with id={position.id!r}")
  10299. pass
  10300. else:
  10301. id_to_position[ position.id] = position
  10302. # Insert links for all positions that have an `href`.
  10303. #
  10304. for position_from in positions:
  10305. if (position_from.open_close & 1) and position_from.href:
  10306. #log(f"add_pdf_links(): position with href: {position}")
  10307. link = dict()
  10308. link['from'] = Rect(position_from.rect)
  10309. if position_from.href.startswith("#"):
  10310. #`<a href="#...">...</a>` internal link.
  10311. target_id = position_from.href[1:]
  10312. try:
  10313. position_to = id_to_position[ target_id]
  10314. except Exception as e:
  10315. if g_exceptions_verbose > 1: exception_info()
  10316. raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") from e
  10317. # Make link from `position_from`'s rect to top-left of
  10318. # `position_to`'s rect.
  10319. if 0:
  10320. log(f"add_pdf_links(): making link from:")
  10321. log(f"add_pdf_links(): {position_from}")
  10322. log(f"add_pdf_links(): to:")
  10323. log(f"add_pdf_links(): {position_to}")
  10324. link["kind"] = LINK_GOTO
  10325. x0, y0, x1, y1 = position_to.rect
  10326. # This appears to work well with viewers which scroll
  10327. # to make destination point top-left of window.
  10328. link["to"] = Point(x0, y0)
  10329. link["page"] = position_to.page_num - 1
  10330. else:
  10331. # `<a href="...">...</a>` external link.
  10332. if position_from.href.startswith('name:'):
  10333. link['kind'] = LINK_NAMED
  10334. link['name'] = position_from.href[5:]
  10335. else:
  10336. link['kind'] = LINK_URI
  10337. link['uri'] = position_from.href
  10338. #log(f'Adding link: {position_from.page_num=} {link=}.')
  10339. document[position_from.page_num - 1].insert_link(link)
  10340. return document
  10341. @property
  10342. def body(self):
  10343. dom = self.document()
  10344. return dom.bodytag()
  10345. def document( self):
  10346. dom = mupdf.fz_story_document( self.this)
  10347. return Xml( dom)
  10348. def draw( self, device, matrix=None):
  10349. ctm2 = JM_matrix_from_py( matrix)
  10350. dev = device.this if device else mupdf.FzDevice( None)
  10351. mupdf.fz_draw_story( self.this, dev, ctm2)
  10352. def element_positions( self, function, args=None):
  10353. '''
  10354. Trigger a callback function to record where items have been placed.
  10355. '''
  10356. if type(args) is dict:
  10357. for k in args.keys():
  10358. if not (type(k) is str and k.isidentifier()):
  10359. raise ValueError(f"invalid key '{k}'")
  10360. else:
  10361. args = {}
  10362. if not callable(function) or function.__code__.co_argcount != 1:
  10363. raise ValueError("callback 'function' must be a callable with exactly one argument")
  10364. def function2( position):
  10365. class Position2:
  10366. pass
  10367. position2 = Position2()
  10368. position2.depth = position.depth
  10369. position2.heading = position.heading
  10370. position2.id = position.id
  10371. position2.rect = JM_py_from_rect(position.rect)
  10372. position2.text = position.text
  10373. position2.open_close = position.open_close
  10374. position2.rect_num = position.rectangle_num
  10375. position2.href = position.href
  10376. if args:
  10377. for k, v in args.items():
  10378. setattr( position2, k, v)
  10379. function( position2)
  10380. mupdf.fz_story_positions( self.this, function2)
  10381. def place( self, where):
  10382. where = JM_rect_from_py( where)
  10383. filled = mupdf.FzRect()
  10384. more = mupdf.fz_place_story( self.this, where, filled)
  10385. return more, JM_py_from_rect( filled)
  10386. def reset( self):
  10387. mupdf.fz_reset_story( self.this)
  10388. def write(self, writer, rectfn, positionfn=None, pagefn=None):
  10389. dev = None
  10390. page_num = 0
  10391. rect_num = 0
  10392. filled = Rect(0, 0, 0, 0)
  10393. while 1:
  10394. mediabox, rect, ctm = rectfn(rect_num, filled)
  10395. rect_num += 1
  10396. if mediabox:
  10397. # new page.
  10398. page_num += 1
  10399. more, filled = self.place( rect)
  10400. if positionfn:
  10401. def positionfn2(position):
  10402. # We add a `.page_num` member to the
  10403. # `ElementPosition` instance.
  10404. position.page_num = page_num
  10405. positionfn(position)
  10406. self.element_positions(positionfn2)
  10407. if writer:
  10408. if mediabox:
  10409. # new page.
  10410. if dev:
  10411. if pagefn:
  10412. pagefn(page_num, mediabox, dev, 1)
  10413. writer.end_page()
  10414. dev = writer.begin_page( mediabox)
  10415. if pagefn:
  10416. pagefn(page_num, mediabox, dev, 0)
  10417. self.draw( dev, ctm)
  10418. if not more:
  10419. if pagefn:
  10420. pagefn( page_num, mediabox, dev, 1)
  10421. writer.end_page()
  10422. else:
  10423. self.draw(None, ctm)
  10424. if not more:
  10425. break
  10426. @staticmethod
  10427. def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
  10428. positions = list()
  10429. content = None
  10430. # Iterate until stable.
  10431. while 1:
  10432. content_prev = content
  10433. content = contentfn( positions)
  10434. stable = False
  10435. if content == content_prev:
  10436. stable = True
  10437. content2 = content
  10438. story = Story(content2, user_css, em, archive)
  10439. if add_header_ids:
  10440. story.add_header_ids()
  10441. positions = list()
  10442. def positionfn2(position):
  10443. #log(f"write_stabilized(): {stable=} {positionfn=} {position=}")
  10444. positions.append(position)
  10445. if stable and positionfn:
  10446. positionfn(position)
  10447. story.write(
  10448. writer if stable else None,
  10449. rectfn,
  10450. positionfn2,
  10451. pagefn,
  10452. )
  10453. if stable:
  10454. break
  10455. @staticmethod
  10456. def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
  10457. #log("write_stabilized_with_links()")
  10458. stream = io.BytesIO()
  10459. writer = DocumentWriter(stream)
  10460. positions = []
  10461. def positionfn2(position):
  10462. #log(f"write_stabilized_with_links(): {position=}")
  10463. positions.append(position)
  10464. if positionfn:
  10465. positionfn(position)
  10466. Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids)
  10467. writer.close()
  10468. stream.seek(0)
  10469. return Story.add_pdf_links(stream, positions)
  10470. def write_with_links(self, rectfn, positionfn=None, pagefn=None):
  10471. #log("write_with_links()")
  10472. stream = io.BytesIO()
  10473. writer = DocumentWriter(stream)
  10474. positions = []
  10475. def positionfn2(position):
  10476. #log(f"write_with_links(): {position=}")
  10477. positions.append(position)
  10478. if positionfn:
  10479. positionfn(position)
  10480. self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn)
  10481. writer.close()
  10482. stream.seek(0)
  10483. return Story.add_pdf_links(stream, positions)
  10484. class FitResult:
  10485. '''
  10486. The result from a `Story.fit*()` method.
  10487. Members:
  10488. `big_enough`:
  10489. `True` if the fit succeeded.
  10490. `filled`:
  10491. From the last call to `Story.place()`.
  10492. `more`:
  10493. `False` if the fit succeeded.
  10494. `numcalls`:
  10495. Number of calls made to `self.place()`.
  10496. `parameter`:
  10497. The successful parameter value, or the largest failing value.
  10498. `rect`:
  10499. The rect created from `parameter`.
  10500. '''
  10501. def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
  10502. self.big_enough = big_enough
  10503. self.filled = filled
  10504. self.more = more
  10505. self.numcalls = numcalls
  10506. self.parameter = parameter
  10507. self.rect = rect
  10508. def __repr__(self):
  10509. return (
  10510. f' big_enough={self.big_enough}'
  10511. f' filled={self.filled}'
  10512. f' more={self.more}'
  10513. f' numcalls={self.numcalls}'
  10514. f' parameter={self.parameter}'
  10515. f' rect={self.rect}'
  10516. )
  10517. def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
  10518. '''
  10519. Finds optimal rect that contains the story `self`.
  10520. Returns a `Story.FitResult` instance.
  10521. On success, the last call to `self.place()` will have been with the
  10522. returned rectangle, so `self.draw()` can be used directly.
  10523. Args:
  10524. :arg fn:
  10525. A callable taking a floating point `parameter` and returning a
  10526. `pymupdf.Rect()`. If the rect is empty, we assume the story will
  10527. not fit and do not call `self.place()`.
  10528. Must guarantee that `self.place()` behaves monotonically when
  10529. given rect `fn(parameter`) as `parameter` increases. This
  10530. usually means that both width and height increase or stay
  10531. unchanged as `parameter` increases.
  10532. :arg pmin:
  10533. Minimum parameter to consider; `None` for -infinity.
  10534. :arg pmax:
  10535. Maximum parameter to consider; `None` for +infinity.
  10536. :arg delta:
  10537. Maximum error in returned `parameter`.
  10538. :arg verbose:
  10539. If true we output diagnostics.
  10540. '''
  10541. def log(text):
  10542. assert verbose
  10543. message(f'fit(): {text}')
  10544. assert isinstance(pmin, (int, float)) or pmin is None
  10545. assert isinstance(pmax, (int, float)) or pmax is None
  10546. class State:
  10547. def __init__(self):
  10548. self.pmin = pmin
  10549. self.pmax = pmax
  10550. self.pmin_result = None
  10551. self.pmax_result = None
  10552. self.result = None
  10553. self.numcalls = 0
  10554. if verbose:
  10555. self.pmin0 = pmin
  10556. self.pmax0 = pmax
  10557. state = State()
  10558. if verbose:
  10559. log(f'starting. {state.pmin=} {state.pmax=}.')
  10560. self.reset()
  10561. def ret():
  10562. if state.pmax is not None:
  10563. if state.last_p != state.pmax:
  10564. if verbose:
  10565. log(f'Calling update() with pmax, because was overwritten by later calls.')
  10566. big_enough = update(state.pmax)
  10567. assert big_enough
  10568. result = state.pmax_result
  10569. else:
  10570. result = state.pmin_result if state.pmin_result else Story.FitResult(numcalls=state.numcalls)
  10571. if verbose:
  10572. log(f'finished. {state.pmin0=} {state.pmax0=} {state.pmax=}: returning {result=}')
  10573. return result
  10574. def update(parameter):
  10575. '''
  10576. Evaluates `more, _ = self.place(fn(parameter))`. If `more` is
  10577. false, then `rect` is big enough to contain `self` and we
  10578. set `state.pmax=parameter` and return True. Otherwise we set
  10579. `state.pmin=parameter` and return False.
  10580. '''
  10581. rect = fn(parameter)
  10582. assert isinstance(rect, Rect), f'{type(rect)=} {rect=}'
  10583. if rect.is_empty:
  10584. big_enough = False
  10585. result = Story.FitResult(parameter=parameter, numcalls=state.numcalls)
  10586. if verbose:
  10587. log(f'update(): not calling self.place() because rect is empty.')
  10588. else:
  10589. more, filled = self.place(rect)
  10590. state.numcalls += 1
  10591. big_enough = not more
  10592. result = Story.FitResult(
  10593. filled=filled,
  10594. more=more,
  10595. numcalls=state.numcalls,
  10596. parameter=parameter,
  10597. rect=rect,
  10598. big_enough=big_enough,
  10599. )
  10600. if verbose:
  10601. log(f'update(): called self.place(): {state.numcalls:>2d}: {more=} {parameter=} {rect=}.')
  10602. if big_enough:
  10603. state.pmax = parameter
  10604. state.pmax_result = result
  10605. else:
  10606. state.pmin = parameter
  10607. state.pmin_result = result
  10608. state.last_p = parameter
  10609. return big_enough
  10610. def opposite(p, direction):
  10611. '''
  10612. Returns same sign as `direction`, larger or smaller than `p` if
  10613. direction is positive or negative respectively.
  10614. '''
  10615. if p is None or p==0:
  10616. return direction
  10617. if direction * p > 0:
  10618. return 2 * p
  10619. return -p
  10620. if state.pmin is None:
  10621. # Find an initial finite pmin value.
  10622. if verbose: log(f'finding pmin.')
  10623. parameter = opposite(state.pmax, -1)
  10624. while 1:
  10625. if not update(parameter):
  10626. break
  10627. parameter *= 2
  10628. else:
  10629. if update(state.pmin):
  10630. if verbose: log(f'{state.pmin=} is big enough.')
  10631. return ret()
  10632. if state.pmax is None:
  10633. # Find an initial finite pmax value.
  10634. if verbose: log(f'finding pmax.')
  10635. parameter = opposite(state.pmin, +1)
  10636. while 1:
  10637. if update(parameter):
  10638. break
  10639. parameter *= 2
  10640. else:
  10641. if not update(state.pmax):
  10642. # No solution possible.
  10643. state.pmax = None
  10644. if verbose: log(f'No solution possible {state.pmax=}.')
  10645. return ret()
  10646. # Do binary search in pmin..pmax.
  10647. if verbose: log(f'doing binary search with {state.pmin=} {state.pmax=}.')
  10648. while 1:
  10649. if state.pmax - state.pmin < delta:
  10650. return ret()
  10651. parameter = (state.pmin + state.pmax) / 2
  10652. update(parameter)
  10653. def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False):
  10654. '''
  10655. Finds smallest value `scale` in range `scale_min..scale_max` where
  10656. `scale * rect` is large enough to contain the story `self`.
  10657. Returns a `Story.FitResult` instance.
  10658. :arg width:
  10659. width of rect.
  10660. :arg height:
  10661. height of rect.
  10662. :arg scale_min:
  10663. Minimum scale to consider; must be >= 0.
  10664. :arg scale_max:
  10665. Maximum scale to consider, must be >= scale_min or `None` for
  10666. infinite.
  10667. :arg delta:
  10668. Maximum error in returned scale.
  10669. :arg verbose:
  10670. If true we output diagnostics.
  10671. '''
  10672. x0, y0, x1, y1 = rect
  10673. width = x1 - x0
  10674. height = y1 - y0
  10675. def fn(scale):
  10676. return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
  10677. return self.fit(fn, scale_min, scale_max, delta, verbose)
  10678. def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
  10679. '''
  10680. Finds smallest height in range `height_min..height_max` where a rect
  10681. with size `(width, height)` is large enough to contain the story
  10682. `self`.
  10683. Returns a `Story.FitResult` instance.
  10684. :arg width:
  10685. width of rect.
  10686. :arg height_min:
  10687. Minimum height to consider; must be >= 0.
  10688. :arg height_max:
  10689. Maximum height to consider, must be >= height_min or `None` for
  10690. infinite.
  10691. :arg origin:
  10692. `(x0, y0)` of rect.
  10693. :arg delta:
  10694. Maximum error in returned height.
  10695. :arg verbose:
  10696. If true we output diagnostics.
  10697. '''
  10698. x0, y0 = origin
  10699. x1 = x0 + width
  10700. def fn(height):
  10701. return Rect(x0, y0, x1, y0+height)
  10702. return self.fit(fn, height_min, height_max, delta, verbose)
  10703. def fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False):
  10704. '''
  10705. Finds smallest width in range `width_min..width_max` where a rect with size
  10706. `(width, height)` is large enough to contain the story `self`.
  10707. Returns a `Story.FitResult` instance.
  10708. Returns a `FitResult` instance.
  10709. :arg height:
  10710. height of rect.
  10711. :arg width_min:
  10712. Minimum width to consider; must be >= 0.
  10713. :arg width_max:
  10714. Maximum width to consider, must be >= width_min or `None` for
  10715. infinite.
  10716. :arg origin:
  10717. `(x0, y0)` of rect.
  10718. :arg delta:
  10719. Maximum error in returned width.
  10720. :arg verbose:
  10721. If true we output diagnostics.
  10722. '''
  10723. x0, y0 = origin
  10724. y1 = y0 + height
  10725. def fn(width):
  10726. return Rect(x0, y0, x0+width, y1)
  10727. return self.fit(fn, width_min, width_max, delta, verbose)
  10728. class TextPage:
  10729. def __init__(self, *args):
  10730. if args_match(args, mupdf.FzRect):
  10731. mediabox = args[0]
  10732. self.this = mupdf.FzStextPage( mediabox)
  10733. elif args_match(args, mupdf.FzStextPage):
  10734. self.this = args[0]
  10735. else:
  10736. raise Exception(f'Unrecognised args: {args}')
  10737. self.thisown = True
  10738. self.parent = None
  10739. def _extractText(self, format_):
  10740. this_tpage = self.this
  10741. res = mupdf.fz_new_buffer(1024)
  10742. out = mupdf.FzOutput( res)
  10743. # fixme: mupdfwrap.py thinks fz_output is not copyable, possibly
  10744. # because there is no .refs member visible and no fz_keep_output() fn,
  10745. # although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer()
  10746. # doesn't convert the returned fz_output* into a mupdf.FzOutput.
  10747. #out = mupdf.FzOutput(out)
  10748. if format_ == 1:
  10749. mupdf.fz_print_stext_page_as_html(out, this_tpage, 0)
  10750. elif format_ == 3:
  10751. mupdf.fz_print_stext_page_as_xml(out, this_tpage, 0)
  10752. elif format_ == 4:
  10753. mupdf.fz_print_stext_page_as_xhtml(out, this_tpage, 0)
  10754. else:
  10755. JM_print_stext_page_as_text(res, this_tpage)
  10756. out.fz_close_output()
  10757. text = JM_EscapeStrFromBuffer(res)
  10758. return text
  10759. def _getNewBlockList(self, page_dict, raw):
  10760. JM_make_textpage_dict(self.this, page_dict, raw)
  10761. def _textpage_dict(self, raw=False):
  10762. page_dict = {"width": self.rect.width, "height": self.rect.height}
  10763. self._getNewBlockList(page_dict, raw)
  10764. return page_dict
  10765. def extractBLOCKS(self):
  10766. """Return a list with text block information."""
  10767. if g_use_extra:
  10768. return extra.extractBLOCKS(self.this)
  10769. block_n = -1
  10770. this_tpage = self.this
  10771. tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
  10772. res = mupdf.fz_new_buffer(1024)
  10773. lines = []
  10774. for block in this_tpage:
  10775. block_n += 1
  10776. blockrect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  10777. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  10778. mupdf.fz_clear_buffer(res) # set text buffer to empty
  10779. line_n = -1
  10780. last_char = 0
  10781. for line in block:
  10782. line_n += 1
  10783. linerect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  10784. for ch in line:
  10785. cbbox = JM_char_bbox(line, ch)
  10786. if (not JM_rects_overlap(tp_rect, cbbox)
  10787. and not mupdf.fz_is_infinite_rect(tp_rect)
  10788. ):
  10789. continue
  10790. JM_append_rune(res, ch.m_internal.c)
  10791. last_char = ch.m_internal.c
  10792. linerect = mupdf.fz_union_rect(linerect, cbbox)
  10793. if last_char != 10 and not mupdf.fz_is_empty_rect(linerect):
  10794. mupdf.fz_append_byte(res, 10)
  10795. blockrect = mupdf.fz_union_rect(blockrect, linerect)
  10796. text = JM_EscapeStrFromBuffer(res)
  10797. elif (JM_rects_overlap(tp_rect, block.m_internal.bbox)
  10798. or mupdf.fz_is_infinite_rect(tp_rect)
  10799. ):
  10800. img = block.i_image()
  10801. cs = img.colorspace()
  10802. text = "<image: %s, width: %d, height: %d, bpc: %d>" % (
  10803. mupdf.fz_colorspace_name(cs),
  10804. img.w(), img.h(), img.bpc()
  10805. )
  10806. blockrect = mupdf.fz_union_rect(blockrect, mupdf.FzRect(block.m_internal.bbox))
  10807. if not mupdf.fz_is_empty_rect(blockrect):
  10808. litem = (
  10809. blockrect.x0,
  10810. blockrect.y0,
  10811. blockrect.x1,
  10812. blockrect.y1,
  10813. text,
  10814. block_n,
  10815. block.m_internal.type,
  10816. )
  10817. lines.append(litem)
  10818. return lines
  10819. def extractDICT(self, cb=None, sort=False) -> dict:
  10820. """Return page content as a Python dict of images and text spans."""
  10821. val = self._textpage_dict(raw=False)
  10822. if cb is not None:
  10823. val["width"] = cb.width
  10824. val["height"] = cb.height
  10825. if sort:
  10826. blocks = val["blocks"]
  10827. blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
  10828. val["blocks"] = blocks
  10829. return val
  10830. def extractHTML(self) -> str:
  10831. """Return page content as a HTML string."""
  10832. return self._extractText(1)
  10833. def extractIMGINFO(self, hashes=0):
  10834. """Return a list with image meta information."""
  10835. block_n = -1
  10836. this_tpage = self.this
  10837. rc = []
  10838. for block in this_tpage:
  10839. block_n += 1
  10840. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  10841. continue
  10842. img = block.i_image()
  10843. img_size = 0
  10844. mask = img.mask()
  10845. if mask.m_internal:
  10846. has_mask = True
  10847. else:
  10848. has_mask = False
  10849. compr_buff = mupdf.fz_compressed_image_buffer(img)
  10850. if compr_buff.m_internal:
  10851. img_size = compr_buff.fz_compressed_buffer_size()
  10852. compr_buff = None
  10853. if hashes:
  10854. r = mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
  10855. assert mupdf.fz_is_infinite_irect(r)
  10856. m = mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0)
  10857. pix, w, h = mupdf.fz_get_pixmap_from_image(img, r, m)
  10858. digest = mupdf.fz_md5_pixmap2(pix)
  10859. digest = bytes(digest)
  10860. if img_size == 0:
  10861. img_size = img.w() * img.h() * img.n()
  10862. cs = mupdf.FzColorspace(mupdf.ll_fz_keep_colorspace(img.m_internal.colorspace))
  10863. block_dict = dict()
  10864. block_dict[dictkey_number] = block_n
  10865. block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
  10866. block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
  10867. block_dict[dictkey_width] = img.w()
  10868. block_dict[dictkey_height] = img.h()
  10869. block_dict[dictkey_colorspace] = mupdf.fz_colorspace_n(cs)
  10870. block_dict[dictkey_cs_name] = mupdf.fz_colorspace_name(cs)
  10871. block_dict[dictkey_xres] = img.xres()
  10872. block_dict[dictkey_yres] = img.yres()
  10873. block_dict[dictkey_bpc] = img.bpc()
  10874. block_dict[dictkey_size] = img_size
  10875. if hashes:
  10876. block_dict["digest"] = digest
  10877. block_dict["has-mask"] = has_mask
  10878. rc.append(block_dict)
  10879. return rc
  10880. def extractJSON(self, cb=None, sort=False) -> str:
  10881. """Return 'extractDICT' converted to JSON format."""
  10882. import base64
  10883. import json
  10884. val = self._textpage_dict(raw=False)
  10885. class b64encode(json.JSONEncoder):
  10886. def default(self, s):
  10887. if type(s) in (bytes, bytearray):
  10888. return base64.b64encode(s).decode()
  10889. if cb is not None:
  10890. val["width"] = cb.width
  10891. val["height"] = cb.height
  10892. if sort:
  10893. blocks = val["blocks"]
  10894. blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
  10895. val["blocks"] = blocks
  10896. val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
  10897. return val
  10898. def extractRAWDICT(self, cb=None, sort=False) -> dict:
  10899. """Return page content as a Python dict of images and text characters."""
  10900. val = self._textpage_dict(raw=True)
  10901. if cb is not None:
  10902. val["width"] = cb.width
  10903. val["height"] = cb.height
  10904. if sort:
  10905. blocks = val["blocks"]
  10906. blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
  10907. val["blocks"] = blocks
  10908. return val
  10909. def extractRAWJSON(self, cb=None, sort=False) -> str:
  10910. """Return 'extractRAWDICT' converted to JSON format."""
  10911. import base64
  10912. import json
  10913. val = self._textpage_dict(raw=True)
  10914. class b64encode(json.JSONEncoder):
  10915. def default(self,s):
  10916. if type(s) in (bytes, bytearray):
  10917. return base64.b64encode(s).decode()
  10918. if cb is not None:
  10919. val["width"] = cb.width
  10920. val["height"] = cb.height
  10921. if sort:
  10922. blocks = val["blocks"]
  10923. blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
  10924. val["blocks"] = blocks
  10925. val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
  10926. return val
  10927. def extractSelection(self, pointa, pointb):
  10928. a = JM_point_from_py(pointa)
  10929. b = JM_point_from_py(pointb)
  10930. found = mupdf.fz_copy_selection(self.this, a, b, 0)
  10931. return found
  10932. def extractText(self, sort=False) -> str:
  10933. """Return simple, bare text on the page."""
  10934. if not sort:
  10935. return self._extractText(0)
  10936. blocks = self.extractBLOCKS()[:]
  10937. blocks.sort(key=lambda b: (b[3], b[0]))
  10938. return "".join([b[4] for b in blocks])
  10939. def extractTextbox(self, rect):
  10940. this_tpage = self.this
  10941. assert isinstance(this_tpage, mupdf.FzStextPage)
  10942. area = JM_rect_from_py(rect)
  10943. found = JM_copy_rectangle(this_tpage, area)
  10944. rc = PyUnicode_DecodeRawUnicodeEscape(found)
  10945. return rc
  10946. def extractWORDS(self, delimiters=None):
  10947. """Return a list with text word information."""
  10948. if g_use_extra:
  10949. return extra.extractWORDS(self.this, delimiters)
  10950. buflen = 0
  10951. last_char_rtl = 0
  10952. block_n = -1
  10953. wbbox = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word bbox
  10954. this_tpage = self.this
  10955. tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
  10956. lines = None
  10957. buff = mupdf.fz_new_buffer(64)
  10958. lines = []
  10959. for block in this_tpage:
  10960. block_n += 1
  10961. if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
  10962. continue
  10963. line_n = -1
  10964. for line in block:
  10965. line_n += 1
  10966. word_n = 0 # word counter per line
  10967. mupdf.fz_clear_buffer(buff) # reset word buffer
  10968. buflen = 0 # reset char counter
  10969. for ch in line:
  10970. cbbox = JM_char_bbox(line, ch)
  10971. if (not JM_rects_overlap(tp_rect, cbbox)
  10972. and not mupdf.fz_is_infinite_rect(tp_rect)
  10973. ):
  10974. continue
  10975. word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
  10976. this_char_rtl = JM_is_rtl_char(ch.m_internal.c)
  10977. if word_delimiter or this_char_rtl != last_char_rtl:
  10978. if buflen == 0 and word_delimiter:
  10979. continue # skip delimiters at line start
  10980. if not mupdf.fz_is_empty_rect(wbbox):
  10981. word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
  10982. mupdf.fz_clear_buffer(buff)
  10983. buflen = 0 # reset char counter
  10984. if word_delimiter:
  10985. continue
  10986. # append one unicode character to the word
  10987. JM_append_rune(buff, ch.m_internal.c)
  10988. last_char_rtl = this_char_rtl
  10989. buflen += 1
  10990. # enlarge word bbox
  10991. wbbox = mupdf.fz_union_rect(wbbox, JM_char_bbox(line, ch))
  10992. if buflen and not mupdf.fz_is_empty_rect(wbbox):
  10993. word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
  10994. buflen = 0
  10995. return lines
  10996. def extractXHTML(self) -> str:
  10997. """Return page content as a XHTML string."""
  10998. return self._extractText(4)
  10999. def extractXML(self) -> str:
  11000. """Return page content as a XML string."""
  11001. return self._extractText(3)
  11002. def poolsize(self):
  11003. """TextPage current poolsize."""
  11004. tpage = self.this
  11005. pool = mupdf.Pool(tpage.m_internal.pool)
  11006. size = mupdf.fz_pool_size( pool)
  11007. pool.m_internal = None # Ensure that pool's destructor does not free the pool.
  11008. return size
  11009. @property
  11010. def rect(self):
  11011. """Page rectangle."""
  11012. this_tpage = self.this
  11013. mediabox = this_tpage.m_internal.mediabox
  11014. val = JM_py_from_rect(mediabox)
  11015. val = Rect(val)
  11016. return val
  11017. def search(self, needle, hit_max=0, quads=1):
  11018. """Locate 'needle' returning rects or quads."""
  11019. val = JM_search_stext_page(self.this, needle)
  11020. if not val:
  11021. return val
  11022. items = len(val)
  11023. for i in range(items): # change entries to quads or rects
  11024. q = Quad(val[i])
  11025. if quads:
  11026. val[i] = q
  11027. else:
  11028. val[i] = q.rect
  11029. if quads:
  11030. return val
  11031. i = 0 # join overlapping rects on the same line
  11032. while i < items - 1:
  11033. v1 = val[i]
  11034. v2 = val[i + 1]
  11035. if v1.y1 != v2.y1 or (v1 & v2).is_empty:
  11036. i += 1
  11037. continue # no overlap on same line
  11038. val[i] = v1 | v2 # join rectangles
  11039. del val[i + 1] # remove v2
  11040. items -= 1 # reduce item count
  11041. return val
  11042. extractTEXT = extractText
  11043. class TextWriter:
  11044. def __init__(self, page_rect, opacity=1, color=None):
  11045. """Stores text spans for later output on compatible PDF pages."""
  11046. self.this = mupdf.fz_new_text()
  11047. self.opacity = opacity
  11048. self.color = color
  11049. self.rect = Rect(page_rect)
  11050. self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height)
  11051. self.ictm = ~self.ctm
  11052. self.last_point = Point()
  11053. self.last_point.__doc__ = "Position following last text insertion."
  11054. self.text_rect = Rect()
  11055. self.text_rect.__doc__ = "Accumulated area of text spans."
  11056. self.used_fonts = set()
  11057. self.thisown = True
  11058. @property
  11059. def _bbox(self):
  11060. val = JM_py_from_rect( mupdf.fz_bound_text( self.this, mupdf.FzStrokeState(None), mupdf.FzMatrix()))
  11061. val = Rect(val)
  11062. return val
  11063. def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left=0, small_caps=0):
  11064. """Store 'text' at point 'pos' using 'font' and 'fontsize'."""
  11065. pos = Point(pos) * self.ictm
  11066. #log( '{font=}')
  11067. if font is None:
  11068. font = Font("helv")
  11069. if not font.is_writable:
  11070. if 0:
  11071. log( '{font.this.m_internal.name=}')
  11072. log( '{font.this.m_internal.t3matrix=}')
  11073. log( '{font.this.m_internal.bbox=}')
  11074. log( '{font.this.m_internal.glyph_count=}')
  11075. log( '{font.this.m_internal.use_glyph_bbox=}')
  11076. log( '{font.this.m_internal.width_count=}')
  11077. log( '{font.this.m_internal.width_default=}')
  11078. log( '{font.this.m_internal.has_digest=}')
  11079. log( 'Unsupported font {font.name=}')
  11080. if mupdf_cppyy:
  11081. import cppyy
  11082. log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}')
  11083. raise ValueError("Unsupported font '%s'." % font.name)
  11084. if right_to_left:
  11085. text = self.clean_rtl(text)
  11086. text = "".join(reversed(text))
  11087. right_to_left = 0
  11088. lang = mupdf.fz_text_language_from_string(language)
  11089. p = JM_point_from_py(pos)
  11090. trm = mupdf.fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y)
  11091. markup_dir = 0
  11092. wmode = 0
  11093. if small_caps == 0:
  11094. trm = mupdf.fz_show_string( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
  11095. else:
  11096. trm = JM_show_string_cs( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
  11097. val = JM_py_from_matrix(trm)
  11098. self.last_point = Point(val[-2:]) * self.ctm
  11099. self.text_rect = self._bbox * self.ctm
  11100. val = self.text_rect, self.last_point
  11101. if font.flags["mono"] == 1:
  11102. self.used_fonts.add(font)
  11103. return val
  11104. def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False):
  11105. lheight = fontsize * 1.2
  11106. for c in text:
  11107. self.append(pos, c, font=font, fontsize=fontsize,
  11108. language=language, small_caps=small_caps)
  11109. pos.y += lheight
  11110. return self.text_rect, self.last_point
  11111. def clean_rtl(self, text):
  11112. """Revert the sequence of Latin text parts.
  11113. Text with right-to-left writing direction (Arabic, Hebrew) often
  11114. contains Latin parts, which are written in left-to-right: numbers, names,
  11115. etc. For output as PDF text we need *everything* in right-to-left.
  11116. E.g. an input like "<arabic> ABCDE FG HIJ <arabic> KL <arabic>" will be
  11117. converted to "<arabic> JIH GF EDCBA <arabic> LK <arabic>". The Arabic
  11118. parts remain untouched.
  11119. Args:
  11120. text: str
  11121. Returns:
  11122. Massaged string.
  11123. """
  11124. if not text:
  11125. return text
  11126. # split into words at space boundaries
  11127. words = text.split(" ")
  11128. idx = []
  11129. for i in range(len(words)):
  11130. w = words[i]
  11131. # revert character sequence for Latin only words
  11132. if not (len(w) < 2 or max([ord(c) for c in w]) > 255):
  11133. words[i] = "".join(reversed(w))
  11134. idx.append(i) # stored index of Latin word
  11135. # adjacent Latin words must revert their sequence, too
  11136. idx2 = [] # store indices of adjacent Latin words
  11137. for i in range(len(idx)):
  11138. if idx2 == []: # empty yet?
  11139. idx2.append(idx[i]) # store Latin word number
  11140. elif idx[i] > idx2[-1] + 1: # large gap to last?
  11141. if len(idx2) > 1: # at least two consecutives?
  11142. words[idx2[0] : idx2[-1] + 1] = reversed(
  11143. words[idx2[0] : idx2[-1] + 1]
  11144. ) # revert their sequence
  11145. idx2 = [idx[i]] # re-initialize
  11146. elif idx[i] == idx2[-1] + 1: # new adjacent Latin word
  11147. idx2.append(idx[i])
  11148. text = " ".join(words)
  11149. return text
  11150. def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
  11151. """Write the text to a PDF page having the TextWriter's page size.
  11152. Args:
  11153. page: a PDF page having same size.
  11154. color: override text color.
  11155. opacity: override transparency.
  11156. overlay: put in foreground or background.
  11157. morph: tuple(Point, Matrix), apply a matrix with a fixpoint.
  11158. matrix: Matrix to be used instead of 'morph' argument.
  11159. render_mode: (int) PDF render mode operator 'Tr'.
  11160. """
  11161. CheckParent(page)
  11162. if abs(self.rect - page.rect) > 1e-3:
  11163. raise ValueError("incompatible page rect")
  11164. if morph is not None:
  11165. if (type(morph) not in (tuple, list)
  11166. or type(morph[0]) is not Point
  11167. or type(morph[1]) is not Matrix
  11168. ):
  11169. raise ValueError("morph must be (Point, Matrix) or None")
  11170. if matrix is not None and morph is not None:
  11171. raise ValueError("only one of matrix, morph is allowed")
  11172. if getattr(opacity, "__float__", None) is None or opacity == -1:
  11173. opacity = self.opacity
  11174. if color is None:
  11175. color = self.color
  11176. if 1:
  11177. pdfpage = page._pdf_page()
  11178. alpha = 1
  11179. if opacity >= 0 and opacity < 1:
  11180. alpha = opacity
  11181. ncol = 1
  11182. dev_color = [0, 0, 0, 0]
  11183. if color:
  11184. ncol, dev_color = JM_color_FromSequence(color)
  11185. if ncol == 3:
  11186. colorspace = mupdf.fz_device_rgb()
  11187. elif ncol == 4:
  11188. colorspace = mupdf.fz_device_cmyk()
  11189. else:
  11190. colorspace = mupdf.fz_device_gray()
  11191. resources = mupdf.pdf_new_dict(pdfpage.doc(), 5)
  11192. contents = mupdf.fz_new_buffer(1024)
  11193. dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents)
  11194. #log( '=== {dev_color!r=}')
  11195. mupdf.fz_fill_text(
  11196. dev,
  11197. self.this,
  11198. mupdf.FzMatrix(),
  11199. colorspace,
  11200. dev_color,
  11201. alpha,
  11202. mupdf.FzColorParams(mupdf.fz_default_color_params),
  11203. )
  11204. mupdf.fz_close_device( dev)
  11205. # copy generated resources into the one of the page
  11206. max_nums = JM_merge_resources( pdfpage, resources)
  11207. cont_string = JM_EscapeStrFromBuffer( contents)
  11208. result = (max_nums, cont_string)
  11209. val = result
  11210. max_nums = val[0]
  11211. content = val[1]
  11212. max_alp, max_font = max_nums
  11213. old_cont_lines = content.splitlines()
  11214. optcont = page._get_optional_content(oc)
  11215. if optcont is not None:
  11216. bdc = "/OC /%s BDC" % optcont
  11217. emc = "EMC"
  11218. else:
  11219. bdc = emc = ""
  11220. new_cont_lines = ["q"]
  11221. if bdc:
  11222. new_cont_lines.append(bdc)
  11223. cb = page.cropbox_position
  11224. if page.rotation in (90, 270):
  11225. delta = page.rect.height - page.rect.width
  11226. else:
  11227. delta = 0
  11228. mb = page.mediabox
  11229. if bool(cb) or mb.y0 != 0 or delta != 0:
  11230. new_cont_lines.append(f"1 0 0 1 {_format_g((cb.x, cb.y + mb.y0 - delta))} cm")
  11231. if morph:
  11232. p = morph[0] * self.ictm
  11233. delta = Matrix(1, 1).pretranslate(p.x, p.y)
  11234. matrix = ~delta * morph[1] * delta
  11235. if morph or matrix:
  11236. new_cont_lines.append(_format_g(JM_TUPLE(matrix)) + " cm")
  11237. for line in old_cont_lines:
  11238. if line.endswith(" cm"):
  11239. continue
  11240. if line == "BT":
  11241. new_cont_lines.append(line)
  11242. new_cont_lines.append("%i Tr" % render_mode)
  11243. continue
  11244. if line.endswith(" gs"):
  11245. alp = int(line.split()[0][4:]) + max_alp
  11246. line = "/Alp%i gs" % alp
  11247. elif line.endswith(" Tf"):
  11248. temp = line.split()
  11249. fsize = float(temp[1])
  11250. if render_mode != 0:
  11251. w = fsize * 0.05
  11252. else:
  11253. w = 1
  11254. new_cont_lines.append(_format_g(w) + " w")
  11255. font = int(temp[0][2:]) + max_font
  11256. line = " ".join(["/F%i" % font] + temp[1:])
  11257. elif line.endswith(" rg"):
  11258. new_cont_lines.append(line.replace("rg", "RG"))
  11259. elif line.endswith(" g"):
  11260. new_cont_lines.append(line.replace(" g", " G"))
  11261. elif line.endswith(" k"):
  11262. new_cont_lines.append(line.replace(" k", " K"))
  11263. new_cont_lines.append(line)
  11264. if emc:
  11265. new_cont_lines.append(emc)
  11266. new_cont_lines.append("Q\n")
  11267. content = "\n".join(new_cont_lines).encode("utf-8")
  11268. TOOLS._insert_contents(page, content, overlay=overlay)
  11269. val = None
  11270. for font in self.used_fonts:
  11271. repair_mono_font(page, font)
  11272. return val
  11273. class IRect:
  11274. """
  11275. IRect() - all zeros
  11276. IRect(x0, y0, x1, y1) - 4 coordinates
  11277. IRect(top-left, x1, y1) - point and 2 coordinates
  11278. IRect(x0, y0, bottom-right) - 2 coordinates and point
  11279. IRect(top-left, bottom-right) - 2 points
  11280. IRect(sequ) - new from sequence or rect-like
  11281. """
  11282. def __add__(self, p):
  11283. return Rect.__add__(self, p).round()
  11284. def __and__(self, x):
  11285. return Rect.__and__(self, x).round()
  11286. def __contains__(self, x):
  11287. return Rect.__contains__(self, x)
  11288. def __eq__(self, r):
  11289. if not hasattr(r, "__len__"):
  11290. return False
  11291. return len(r) == 4 and self.x0 == r[0] and self.y0 == r[1] and self.x1 == r[2] and self.y1 == r[3]
  11292. def __getitem__(self, i):
  11293. return (self.x0, self.y0, self.x1, self.y1)[i]
  11294. def __hash__(self):
  11295. return hash(tuple(self))
  11296. def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
  11297. self.x0, self.y0, self.x1, self.y1 = util_make_irect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
  11298. def __len__(self):
  11299. return 4
  11300. def __mul__(self, m):
  11301. return Rect.__mul__(self, m).round()
  11302. def __neg__(self):
  11303. return IRect(-self.x0, -self.y0, -self.x1, -self.y1)
  11304. def __or__(self, x):
  11305. return Rect.__or__(self, x).round()
  11306. def __pos__(self):
  11307. return IRect(self)
  11308. def __repr__(self):
  11309. return "IRect" + str(tuple(self))
  11310. def __setitem__(self, i, v):
  11311. v = int(v)
  11312. if i == 0: self.x0 = v
  11313. elif i == 1: self.y0 = v
  11314. elif i == 2: self.x1 = v
  11315. elif i == 3: self.y1 = v
  11316. else:
  11317. raise IndexError("index out of range")
  11318. return None
  11319. def __sub__(self, p):
  11320. return Rect.__sub__(self, p).round()
  11321. def __truediv__(self, m):
  11322. return Rect.__truediv__(self, m).round()
  11323. @property
  11324. def bottom_left(self):
  11325. """Bottom-left corner."""
  11326. return Point(self.x0, self.y1)
  11327. @property
  11328. def bottom_right(self):
  11329. """Bottom-right corner."""
  11330. return Point(self.x1, self.y1)
  11331. @property
  11332. def height(self):
  11333. return max(0, self.y1 - self.y0)
  11334. def contains(self, x):
  11335. """Check if x is in the rectangle."""
  11336. return self.__contains__(x)
  11337. def include_point(self, p):
  11338. """Extend rectangle to include point p."""
  11339. rect = self.rect.include_point(p)
  11340. return rect.irect
  11341. def include_rect(self, r):
  11342. """Extend rectangle to include rectangle r."""
  11343. rect = self.rect.include_rect(r)
  11344. return rect.irect
  11345. def intersect(self, r):
  11346. """Restrict rectangle to intersection with rectangle r."""
  11347. return Rect.intersect(self, r).round()
  11348. def intersects(self, x):
  11349. return Rect.intersects(self, x)
  11350. @property
  11351. def is_empty(self):
  11352. """True if rectangle area is empty."""
  11353. return self.x0 >= self.x1 or self.y0 >= self.y1
  11354. @property
  11355. def is_infinite(self):
  11356. """True if rectangle is infinite."""
  11357. return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
  11358. @property
  11359. def is_valid(self):
  11360. """True if rectangle is valid."""
  11361. return self.x0 <= self.x1 and self.y0 <= self.y1
  11362. def morph(self, p, m):
  11363. """Morph with matrix-like m and point-like p.
  11364. Returns a new quad."""
  11365. if self.is_infinite:
  11366. return INFINITE_QUAD()
  11367. return self.quad.morph(p, m)
  11368. def norm(self):
  11369. return math.sqrt(sum([c*c for c in self]))
  11370. def normalize(self):
  11371. """Replace rectangle with its valid version."""
  11372. if self.x1 < self.x0:
  11373. self.x0, self.x1 = self.x1, self.x0
  11374. if self.y1 < self.y0:
  11375. self.y0, self.y1 = self.y1, self.y0
  11376. return self
  11377. @property
  11378. def quad(self):
  11379. """Return Quad version of rectangle."""
  11380. return Quad(self.tl, self.tr, self.bl, self.br)
  11381. @property
  11382. def rect(self):
  11383. return Rect(self)
  11384. @property
  11385. def top_left(self):
  11386. """Top-left corner."""
  11387. return Point(self.x0, self.y0)
  11388. @property
  11389. def top_right(self):
  11390. """Top-right corner."""
  11391. return Point(self.x1, self.y0)
  11392. def torect(self, r):
  11393. """Return matrix that converts to target rect."""
  11394. r = Rect(r)
  11395. if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
  11396. raise ValueError("rectangles must be finite and not empty")
  11397. return (
  11398. Matrix(1, 0, 0, 1, -self.x0, -self.y0)
  11399. * Matrix(r.width / self.width, r.height / self.height)
  11400. * Matrix(1, 0, 0, 1, r.x0, r.y0)
  11401. )
  11402. def transform(self, m):
  11403. return Rect.transform(self, m).round()
  11404. @property
  11405. def width(self):
  11406. return max(0, self.x1 - self.x0)
  11407. br = bottom_right
  11408. bl = bottom_left
  11409. tl = top_left
  11410. tr = top_right
  11411. # Data
  11412. #
  11413. if 1:
  11414. _self = sys.modules[__name__]
  11415. if 1:
  11416. for _name, _value in mupdf.__dict__.items():
  11417. if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
  11418. if _name.startswith('PDF_ENUM_NAME_'):
  11419. # Not a simple enum.
  11420. pass
  11421. else:
  11422. #assert not inspect.isroutine(value)
  11423. #log(f'importing {_name=} {_value=}.')
  11424. setattr(_self, _name, _value)
  11425. #log(f'{getattr( self, name, None)=}')
  11426. else:
  11427. # This is slow due to importing inspect, e.g. 0.019 instead of 0.004.
  11428. for _name, _value in inspect.getmembers(mupdf):
  11429. if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
  11430. if _name.startswith('PDF_ENUM_NAME_'):
  11431. # Not a simple enum.
  11432. pass
  11433. else:
  11434. #assert not inspect.isroutine(value)
  11435. #log(f'importing {name}')
  11436. setattr(_self, _name, _value)
  11437. #log(f'{getattr( self, name, None)=}')
  11438. # This is a macro so not preserved in mupdf C++/Python bindings.
  11439. #
  11440. PDF_SIGNATURE_DEFAULT_APPEARANCE = (0
  11441. | mupdf.PDF_SIGNATURE_SHOW_LABELS
  11442. | mupdf.PDF_SIGNATURE_SHOW_DN
  11443. | mupdf.PDF_SIGNATURE_SHOW_DATE
  11444. | mupdf.PDF_SIGNATURE_SHOW_TEXT_NAME
  11445. | mupdf.PDF_SIGNATURE_SHOW_GRAPHIC_NAME
  11446. | mupdf.PDF_SIGNATURE_SHOW_LOGO
  11447. )
  11448. #UCDN_SCRIPT_ADLAM = mupdf.UCDN_SCRIPT_ADLAM
  11449. #setattr(self, 'UCDN_SCRIPT_ADLAM', mupdf.UCDN_SCRIPT_ADLAM)
  11450. assert mupdf.UCDN_EAST_ASIAN_H == 1
  11451. # Flake8 incorrectly fails next two lines because we've dynamically added
  11452. # items to self.
  11453. assert PDF_TX_FIELD_IS_MULTILINE == mupdf.PDF_TX_FIELD_IS_MULTILINE # noqa: F821
  11454. assert UCDN_SCRIPT_ADLAM == mupdf.UCDN_SCRIPT_ADLAM # noqa: F821
  11455. del _self, _name, _value
  11456. AnyType = typing.Any
  11457. Base14_fontnames = (
  11458. "Courier",
  11459. "Courier-Oblique",
  11460. "Courier-Bold",
  11461. "Courier-BoldOblique",
  11462. "Helvetica",
  11463. "Helvetica-Oblique",
  11464. "Helvetica-Bold",
  11465. "Helvetica-BoldOblique",
  11466. "Times-Roman",
  11467. "Times-Italic",
  11468. "Times-Bold",
  11469. "Times-BoldItalic",
  11470. "Symbol",
  11471. "ZapfDingbats",
  11472. )
  11473. Base14_fontdict = {}
  11474. for f in Base14_fontnames:
  11475. Base14_fontdict[f.lower()] = f
  11476. Base14_fontdict["helv"] = "Helvetica"
  11477. Base14_fontdict["heit"] = "Helvetica-Oblique"
  11478. Base14_fontdict["hebo"] = "Helvetica-Bold"
  11479. Base14_fontdict["hebi"] = "Helvetica-BoldOblique"
  11480. Base14_fontdict["cour"] = "Courier"
  11481. Base14_fontdict["coit"] = "Courier-Oblique"
  11482. Base14_fontdict["cobo"] = "Courier-Bold"
  11483. Base14_fontdict["cobi"] = "Courier-BoldOblique"
  11484. Base14_fontdict["tiro"] = "Times-Roman"
  11485. Base14_fontdict["tibo"] = "Times-Bold"
  11486. Base14_fontdict["tiit"] = "Times-Italic"
  11487. Base14_fontdict["tibi"] = "Times-BoldItalic"
  11488. Base14_fontdict["symb"] = "Symbol"
  11489. Base14_fontdict["zadb"] = "ZapfDingbats"
  11490. EPSILON = 1e-5
  11491. FLT_EPSILON = 1e-5
  11492. # largest 32bit integers surviving C float conversion roundtrips
  11493. # used by MuPDF to define infinite rectangles
  11494. FZ_MIN_INF_RECT = -0x80000000
  11495. FZ_MAX_INF_RECT = 0x7fffff80
  11496. JM_annot_id_stem = "fitz"
  11497. JM_mupdf_warnings_store = []
  11498. JM_mupdf_show_errors = 1
  11499. JM_mupdf_show_warnings = 0
  11500. # ------------------------------------------------------------------------------
  11501. # Image recompression constants
  11502. # ------------------------------------------------------------------------------
  11503. FZ_RECOMPRESS_NEVER = mupdf.FZ_RECOMPRESS_NEVER
  11504. FZ_RECOMPRESS_SAME = mupdf.FZ_RECOMPRESS_SAME
  11505. FZ_RECOMPRESS_LOSSLESS = mupdf.FZ_RECOMPRESS_LOSSLESS
  11506. FZ_RECOMPRESS_JPEG = mupdf.FZ_RECOMPRESS_JPEG
  11507. FZ_RECOMPRESS_J2K = mupdf.FZ_RECOMPRESS_J2K
  11508. FZ_RECOMPRESS_FAX = mupdf.FZ_RECOMPRESS_FAX
  11509. FZ_SUBSAMPLE_AVERAGE = mupdf.FZ_SUBSAMPLE_AVERAGE
  11510. FZ_SUBSAMPLE_BICUBIC = mupdf.FZ_SUBSAMPLE_BICUBIC
  11511. # ------------------------------------------------------------------------------
  11512. # Various PDF Optional Content Flags
  11513. # ------------------------------------------------------------------------------
  11514. PDF_OC_ON = 0
  11515. PDF_OC_TOGGLE = 1
  11516. PDF_OC_OFF = 2
  11517. # ------------------------------------------------------------------------------
  11518. # link kinds and link flags
  11519. # ------------------------------------------------------------------------------
  11520. LINK_NONE = 0
  11521. LINK_GOTO = 1
  11522. LINK_URI = 2
  11523. LINK_LAUNCH = 3
  11524. LINK_NAMED = 4
  11525. LINK_GOTOR = 5
  11526. LINK_FLAG_L_VALID = 1
  11527. LINK_FLAG_T_VALID = 2
  11528. LINK_FLAG_R_VALID = 4
  11529. LINK_FLAG_B_VALID = 8
  11530. LINK_FLAG_FIT_H = 16
  11531. LINK_FLAG_FIT_V = 32
  11532. LINK_FLAG_R_IS_ZOOM = 64
  11533. SigFlag_SignaturesExist = 1
  11534. SigFlag_AppendOnly = 2
  11535. STAMP_Approved = 0
  11536. STAMP_AsIs = 1
  11537. STAMP_Confidential = 2
  11538. STAMP_Departmental = 3
  11539. STAMP_Experimental = 4
  11540. STAMP_Expired = 5
  11541. STAMP_Final = 6
  11542. STAMP_ForComment = 7
  11543. STAMP_ForPublicRelease = 8
  11544. STAMP_NotApproved = 9
  11545. STAMP_NotForPublicRelease = 10
  11546. STAMP_Sold = 11
  11547. STAMP_TopSecret = 12
  11548. STAMP_Draft = 13
  11549. TEXT_ALIGN_LEFT = 0
  11550. TEXT_ALIGN_CENTER = 1
  11551. TEXT_ALIGN_RIGHT = 2
  11552. TEXT_ALIGN_JUSTIFY = 3
  11553. TEXT_FONT_SUPERSCRIPT = 1
  11554. TEXT_FONT_ITALIC = 2
  11555. TEXT_FONT_SERIFED = 4
  11556. TEXT_FONT_MONOSPACED = 8
  11557. TEXT_FONT_BOLD = 16
  11558. TEXT_OUTPUT_TEXT = 0
  11559. TEXT_OUTPUT_HTML = 1
  11560. TEXT_OUTPUT_JSON = 2
  11561. TEXT_OUTPUT_XML = 3
  11562. TEXT_OUTPUT_XHTML = 4
  11563. TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES
  11564. TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE
  11565. TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES
  11566. TEXT_INHIBIT_SPACES = mupdf.FZ_STEXT_INHIBIT_SPACES
  11567. TEXT_DEHYPHENATE = mupdf.FZ_STEXT_DEHYPHENATE
  11568. TEXT_PRESERVE_SPANS = mupdf.FZ_STEXT_PRESERVE_SPANS
  11569. TEXT_MEDIABOX_CLIP = mupdf.FZ_STEXT_MEDIABOX_CLIP
  11570. TEXT_USE_CID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11571. TEXT_COLLECT_STRUCTURE = mupdf.FZ_STEXT_COLLECT_STRUCTURE
  11572. TEXT_ACCURATE_BBOXES = mupdf.FZ_STEXT_ACCURATE_BBOXES
  11573. TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS
  11574. TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT
  11575. TEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT
  11576. if mupdf_version_tuple >= (1, 26):
  11577. TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK
  11578. TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT
  11579. TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES
  11580. TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE
  11581. TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT
  11582. TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS
  11583. TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS
  11584. # 2025-05-07: Non-standard names preserved for backwards compatibility.
  11585. TEXT_STEXT_SEGMENT = TEXT_SEGMENT
  11586. TEXT_CID_FOR_UNKNOWN_UNICODE = TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11587. TEXTFLAGS_WORDS = (0
  11588. | TEXT_PRESERVE_LIGATURES
  11589. | TEXT_PRESERVE_WHITESPACE
  11590. | TEXT_MEDIABOX_CLIP
  11591. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11592. )
  11593. TEXTFLAGS_BLOCKS = (0
  11594. | TEXT_PRESERVE_LIGATURES
  11595. | TEXT_PRESERVE_WHITESPACE
  11596. | TEXT_MEDIABOX_CLIP
  11597. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11598. )
  11599. TEXTFLAGS_DICT = (0
  11600. | TEXT_PRESERVE_LIGATURES
  11601. | TEXT_PRESERVE_WHITESPACE
  11602. | TEXT_MEDIABOX_CLIP
  11603. | TEXT_PRESERVE_IMAGES
  11604. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11605. )
  11606. TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT
  11607. TEXTFLAGS_SEARCH = (0
  11608. | TEXT_PRESERVE_WHITESPACE
  11609. | TEXT_MEDIABOX_CLIP
  11610. | TEXT_DEHYPHENATE
  11611. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11612. )
  11613. TEXTFLAGS_HTML = (0
  11614. | TEXT_PRESERVE_LIGATURES
  11615. | TEXT_PRESERVE_WHITESPACE
  11616. | TEXT_MEDIABOX_CLIP
  11617. | TEXT_PRESERVE_IMAGES
  11618. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11619. )
  11620. TEXTFLAGS_XHTML = (0
  11621. | TEXT_PRESERVE_LIGATURES
  11622. | TEXT_PRESERVE_WHITESPACE
  11623. | TEXT_MEDIABOX_CLIP
  11624. | TEXT_PRESERVE_IMAGES
  11625. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11626. )
  11627. TEXTFLAGS_XML = (0
  11628. | TEXT_PRESERVE_LIGATURES
  11629. | TEXT_PRESERVE_WHITESPACE
  11630. | TEXT_MEDIABOX_CLIP
  11631. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11632. )
  11633. TEXTFLAGS_TEXT = (0
  11634. | TEXT_PRESERVE_LIGATURES
  11635. | TEXT_PRESERVE_WHITESPACE
  11636. | TEXT_MEDIABOX_CLIP
  11637. | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
  11638. )
  11639. # Simple text encoding options
  11640. TEXT_ENCODING_LATIN = 0
  11641. TEXT_ENCODING_GREEK = 1
  11642. TEXT_ENCODING_CYRILLIC = 2
  11643. TOOLS_JM_UNIQUE_ID = 0
  11644. # colorspace identifiers
  11645. CS_RGB = 1
  11646. CS_GRAY = 2
  11647. CS_CMYK = 3
  11648. # PDF Blend Modes
  11649. PDF_BM_Color = "Color"
  11650. PDF_BM_ColorBurn = "ColorBurn"
  11651. PDF_BM_ColorDodge = "ColorDodge"
  11652. PDF_BM_Darken = "Darken"
  11653. PDF_BM_Difference = "Difference"
  11654. PDF_BM_Exclusion = "Exclusion"
  11655. PDF_BM_HardLight = "HardLight"
  11656. PDF_BM_Hue = "Hue"
  11657. PDF_BM_Lighten = "Lighten"
  11658. PDF_BM_Luminosity = "Luminosity"
  11659. PDF_BM_Multiply = "Multiply"
  11660. PDF_BM_Normal = "Normal"
  11661. PDF_BM_Overlay = "Overlay"
  11662. PDF_BM_Saturation = "Saturation"
  11663. PDF_BM_Screen = "Screen"
  11664. PDF_BM_SoftLight = "Softlight"
  11665. annot_skel = {
  11666. "goto1": lambda a, b, c, d, e: f"<</A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>/Rect[{e}]/BS<</W 0>>/Subtype/Link>>",
  11667. "goto2": lambda a, b: f"<</A<</S/GoTo/D{a}>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
  11668. "gotor1": lambda a, b, c, d, e, f, g: f"<</A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F({e})/UF({f})/Type/Filespec>>>>/Rect[{g}]/BS<</W 0>>/Subtype/Link>>",
  11669. "gotor2": lambda a, b, c: f"<</A<</S/GoToR/D{a}/F({b})>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
  11670. "launch": lambda a, b, c: f"<</A<</S/Launch/F<</F({a})/UF({b})/Type/Filespec>>>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
  11671. "uri": lambda a, b: f"<</A<</S/URI/URI({a})>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
  11672. "named": lambda a, b: f"<</A<</S/GoTo/D({a})/Type/Action>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
  11673. }
  11674. class FileDataError(RuntimeError):
  11675. """Raised for documents with file structure issues."""
  11676. pass
  11677. class FileNotFoundError(RuntimeError):
  11678. """Raised if file does not exist."""
  11679. pass
  11680. class EmptyFileError(FileDataError):
  11681. """Raised when creating documents from zero-length data."""
  11682. pass
  11683. # propagate exception class to C-level code
  11684. #_set_FileDataError(FileDataError)
  11685. csRGB = Colorspace(CS_RGB)
  11686. csGRAY = Colorspace(CS_GRAY)
  11687. csCMYK = Colorspace(CS_CMYK)
  11688. # These don't appear to be visible in classic, but are used
  11689. # internally.
  11690. #
  11691. dictkey_align = "align"
  11692. dictkey_asc = "ascender"
  11693. dictkey_bidi = "bidi"
  11694. dictkey_bbox = "bbox"
  11695. dictkey_blocks = "blocks"
  11696. dictkey_bpc = "bpc"
  11697. dictkey_c = "c"
  11698. dictkey_chars = "chars"
  11699. dictkey_color = "color"
  11700. dictkey_colorspace = "colorspace"
  11701. dictkey_content = "content"
  11702. dictkey_creationDate = "creationDate"
  11703. dictkey_cs_name = "cs-name"
  11704. dictkey_da = "da"
  11705. dictkey_dashes = "dashes"
  11706. dictkey_descr = "description"
  11707. dictkey_desc = "descender"
  11708. dictkey_dir = "dir"
  11709. dictkey_effect = "effect"
  11710. dictkey_ext = "ext"
  11711. dictkey_filename = "filename"
  11712. dictkey_fill = "fill"
  11713. dictkey_flags = "flags"
  11714. dictkey_char_flags = "char_flags"
  11715. dictkey_font = "font"
  11716. dictkey_glyph = "glyph"
  11717. dictkey_height = "height"
  11718. dictkey_id = "id"
  11719. dictkey_image = "image"
  11720. dictkey_items = "items"
  11721. dictkey_length = "length"
  11722. dictkey_lines = "lines"
  11723. dictkey_matrix = "transform"
  11724. dictkey_modDate = "modDate"
  11725. dictkey_name = "name"
  11726. dictkey_number = "number"
  11727. dictkey_origin = "origin"
  11728. dictkey_rect = "rect"
  11729. dictkey_size = "size"
  11730. dictkey_smask = "smask"
  11731. dictkey_spans = "spans"
  11732. dictkey_stroke = "stroke"
  11733. dictkey_style = "style"
  11734. dictkey_subject = "subject"
  11735. dictkey_text = "text"
  11736. dictkey_title = "title"
  11737. dictkey_type = "type"
  11738. dictkey_ufilename = "ufilename"
  11739. dictkey_width = "width"
  11740. dictkey_wmode = "wmode"
  11741. dictkey_xref = "xref"
  11742. dictkey_xres = "xres"
  11743. dictkey_yres = "yres"
  11744. try:
  11745. from pymupdf_fonts import fontdescriptors, fontbuffers
  11746. fitz_fontdescriptors = fontdescriptors.copy()
  11747. for k in fitz_fontdescriptors.keys():
  11748. fitz_fontdescriptors[k]["loader"] = fontbuffers[k]
  11749. del fontdescriptors, fontbuffers
  11750. except ImportError:
  11751. fitz_fontdescriptors = {}
  11752. symbol_glyphs = ( # Glyph list for the built-in font 'Symbol'
  11753. (183, 0.46),
  11754. (183, 0.46),
  11755. (183, 0.46),
  11756. (183, 0.46),
  11757. (183, 0.46),
  11758. (183, 0.46),
  11759. (183, 0.46),
  11760. (183, 0.46),
  11761. (183, 0.46),
  11762. (183, 0.46),
  11763. (183, 0.46),
  11764. (183, 0.46),
  11765. (183, 0.46),
  11766. (183, 0.46),
  11767. (183, 0.46),
  11768. (183, 0.46),
  11769. (183, 0.46),
  11770. (183, 0.46),
  11771. (183, 0.46),
  11772. (183, 0.46),
  11773. (183, 0.46),
  11774. (183, 0.46),
  11775. (183, 0.46),
  11776. (183, 0.46),
  11777. (183, 0.46),
  11778. (183, 0.46),
  11779. (183, 0.46),
  11780. (183, 0.46),
  11781. (183, 0.46),
  11782. (183, 0.46),
  11783. (183, 0.46),
  11784. (183, 0.46),
  11785. (32, 0.25),
  11786. (33, 0.333),
  11787. (34, 0.713),
  11788. (35, 0.5),
  11789. (36, 0.549),
  11790. (37, 0.833),
  11791. (38, 0.778),
  11792. (39, 0.439),
  11793. (40, 0.333),
  11794. (41, 0.333),
  11795. (42, 0.5),
  11796. (43, 0.549),
  11797. (44, 0.25),
  11798. (45, 0.549),
  11799. (46, 0.25),
  11800. (47, 0.278),
  11801. (48, 0.5),
  11802. (49, 0.5),
  11803. (50, 0.5),
  11804. (51, 0.5),
  11805. (52, 0.5),
  11806. (53, 0.5),
  11807. (54, 0.5),
  11808. (55, 0.5),
  11809. (56, 0.5),
  11810. (57, 0.5),
  11811. (58, 0.278),
  11812. (59, 0.278),
  11813. (60, 0.549),
  11814. (61, 0.549),
  11815. (62, 0.549),
  11816. (63, 0.444),
  11817. (64, 0.549),
  11818. (65, 0.722),
  11819. (66, 0.667),
  11820. (67, 0.722),
  11821. (68, 0.612),
  11822. (69, 0.611),
  11823. (70, 0.763),
  11824. (71, 0.603),
  11825. (72, 0.722),
  11826. (73, 0.333),
  11827. (74, 0.631),
  11828. (75, 0.722),
  11829. (76, 0.686),
  11830. (77, 0.889),
  11831. (78, 0.722),
  11832. (79, 0.722),
  11833. (80, 0.768),
  11834. (81, 0.741),
  11835. (82, 0.556),
  11836. (83, 0.592),
  11837. (84, 0.611),
  11838. (85, 0.69),
  11839. (86, 0.439),
  11840. (87, 0.768),
  11841. (88, 0.645),
  11842. (89, 0.795),
  11843. (90, 0.611),
  11844. (91, 0.333),
  11845. (92, 0.863),
  11846. (93, 0.333),
  11847. (94, 0.658),
  11848. (95, 0.5),
  11849. (96, 0.5),
  11850. (97, 0.631),
  11851. (98, 0.549),
  11852. (99, 0.549),
  11853. (100, 0.494),
  11854. (101, 0.439),
  11855. (102, 0.521),
  11856. (103, 0.411),
  11857. (104, 0.603),
  11858. (105, 0.329),
  11859. (106, 0.603),
  11860. (107, 0.549),
  11861. (108, 0.549),
  11862. (109, 0.576),
  11863. (110, 0.521),
  11864. (111, 0.549),
  11865. (112, 0.549),
  11866. (113, 0.521),
  11867. (114, 0.549),
  11868. (115, 0.603),
  11869. (116, 0.439),
  11870. (117, 0.576),
  11871. (118, 0.713),
  11872. (119, 0.686),
  11873. (120, 0.493),
  11874. (121, 0.686),
  11875. (122, 0.494),
  11876. (123, 0.48),
  11877. (124, 0.2),
  11878. (125, 0.48),
  11879. (126, 0.549),
  11880. (183, 0.46),
  11881. (183, 0.46),
  11882. (183, 0.46),
  11883. (183, 0.46),
  11884. (183, 0.46),
  11885. (183, 0.46),
  11886. (183, 0.46),
  11887. (183, 0.46),
  11888. (183, 0.46),
  11889. (183, 0.46),
  11890. (183, 0.46),
  11891. (183, 0.46),
  11892. (183, 0.46),
  11893. (183, 0.46),
  11894. (183, 0.46),
  11895. (183, 0.46),
  11896. (183, 0.46),
  11897. (183, 0.46),
  11898. (183, 0.46),
  11899. (183, 0.46),
  11900. (183, 0.46),
  11901. (183, 0.46),
  11902. (183, 0.46),
  11903. (183, 0.46),
  11904. (183, 0.46),
  11905. (183, 0.46),
  11906. (183, 0.46),
  11907. (183, 0.46),
  11908. (183, 0.46),
  11909. (183, 0.46),
  11910. (183, 0.46),
  11911. (183, 0.46),
  11912. (183, 0.46),
  11913. (160, 0.25),
  11914. (161, 0.62),
  11915. (162, 0.247),
  11916. (163, 0.549),
  11917. (164, 0.167),
  11918. (165, 0.713),
  11919. (166, 0.5),
  11920. (167, 0.753),
  11921. (168, 0.753),
  11922. (169, 0.753),
  11923. (170, 0.753),
  11924. (171, 1.042),
  11925. (172, 0.713),
  11926. (173, 0.603),
  11927. (174, 0.987),
  11928. (175, 0.603),
  11929. (176, 0.4),
  11930. (177, 0.549),
  11931. (178, 0.411),
  11932. (179, 0.549),
  11933. (180, 0.549),
  11934. (181, 0.576),
  11935. (182, 0.494),
  11936. (183, 0.46),
  11937. (184, 0.549),
  11938. (185, 0.549),
  11939. (186, 0.549),
  11940. (187, 0.549),
  11941. (188, 1),
  11942. (189, 0.603),
  11943. (190, 1),
  11944. (191, 0.658),
  11945. (192, 0.823),
  11946. (193, 0.686),
  11947. (194, 0.795),
  11948. (195, 0.987),
  11949. (196, 0.768),
  11950. (197, 0.768),
  11951. (198, 0.823),
  11952. (199, 0.768),
  11953. (200, 0.768),
  11954. (201, 0.713),
  11955. (202, 0.713),
  11956. (203, 0.713),
  11957. (204, 0.713),
  11958. (205, 0.713),
  11959. (206, 0.713),
  11960. (207, 0.713),
  11961. (208, 0.768),
  11962. (209, 0.713),
  11963. (210, 0.79),
  11964. (211, 0.79),
  11965. (212, 0.89),
  11966. (213, 0.823),
  11967. (214, 0.549),
  11968. (215, 0.549),
  11969. (216, 0.713),
  11970. (217, 0.603),
  11971. (218, 0.603),
  11972. (219, 1.042),
  11973. (220, 0.987),
  11974. (221, 0.603),
  11975. (222, 0.987),
  11976. (223, 0.603),
  11977. (224, 0.494),
  11978. (225, 0.329),
  11979. (226, 0.79),
  11980. (227, 0.79),
  11981. (228, 0.786),
  11982. (229, 0.713),
  11983. (230, 0.384),
  11984. (231, 0.384),
  11985. (232, 0.384),
  11986. (233, 0.384),
  11987. (234, 0.384),
  11988. (235, 0.384),
  11989. (236, 0.494),
  11990. (237, 0.494),
  11991. (238, 0.494),
  11992. (239, 0.494),
  11993. (183, 0.46),
  11994. (241, 0.329),
  11995. (242, 0.274),
  11996. (243, 0.686),
  11997. (244, 0.686),
  11998. (245, 0.686),
  11999. (246, 0.384),
  12000. (247, 0.549),
  12001. (248, 0.384),
  12002. (249, 0.384),
  12003. (250, 0.384),
  12004. (251, 0.384),
  12005. (252, 0.494),
  12006. (253, 0.494),
  12007. (254, 0.494),
  12008. (183, 0.46),
  12009. )
  12010. zapf_glyphs = ( # Glyph list for the built-in font 'ZapfDingbats'
  12011. (183, 0.788),
  12012. (183, 0.788),
  12013. (183, 0.788),
  12014. (183, 0.788),
  12015. (183, 0.788),
  12016. (183, 0.788),
  12017. (183, 0.788),
  12018. (183, 0.788),
  12019. (183, 0.788),
  12020. (183, 0.788),
  12021. (183, 0.788),
  12022. (183, 0.788),
  12023. (183, 0.788),
  12024. (183, 0.788),
  12025. (183, 0.788),
  12026. (183, 0.788),
  12027. (183, 0.788),
  12028. (183, 0.788),
  12029. (183, 0.788),
  12030. (183, 0.788),
  12031. (183, 0.788),
  12032. (183, 0.788),
  12033. (183, 0.788),
  12034. (183, 0.788),
  12035. (183, 0.788),
  12036. (183, 0.788),
  12037. (183, 0.788),
  12038. (183, 0.788),
  12039. (183, 0.788),
  12040. (183, 0.788),
  12041. (183, 0.788),
  12042. (183, 0.788),
  12043. (32, 0.278),
  12044. (33, 0.974),
  12045. (34, 0.961),
  12046. (35, 0.974),
  12047. (36, 0.98),
  12048. (37, 0.719),
  12049. (38, 0.789),
  12050. (39, 0.79),
  12051. (40, 0.791),
  12052. (41, 0.69),
  12053. (42, 0.96),
  12054. (43, 0.939),
  12055. (44, 0.549),
  12056. (45, 0.855),
  12057. (46, 0.911),
  12058. (47, 0.933),
  12059. (48, 0.911),
  12060. (49, 0.945),
  12061. (50, 0.974),
  12062. (51, 0.755),
  12063. (52, 0.846),
  12064. (53, 0.762),
  12065. (54, 0.761),
  12066. (55, 0.571),
  12067. (56, 0.677),
  12068. (57, 0.763),
  12069. (58, 0.76),
  12070. (59, 0.759),
  12071. (60, 0.754),
  12072. (61, 0.494),
  12073. (62, 0.552),
  12074. (63, 0.537),
  12075. (64, 0.577),
  12076. (65, 0.692),
  12077. (66, 0.786),
  12078. (67, 0.788),
  12079. (68, 0.788),
  12080. (69, 0.79),
  12081. (70, 0.793),
  12082. (71, 0.794),
  12083. (72, 0.816),
  12084. (73, 0.823),
  12085. (74, 0.789),
  12086. (75, 0.841),
  12087. (76, 0.823),
  12088. (77, 0.833),
  12089. (78, 0.816),
  12090. (79, 0.831),
  12091. (80, 0.923),
  12092. (81, 0.744),
  12093. (82, 0.723),
  12094. (83, 0.749),
  12095. (84, 0.79),
  12096. (85, 0.792),
  12097. (86, 0.695),
  12098. (87, 0.776),
  12099. (88, 0.768),
  12100. (89, 0.792),
  12101. (90, 0.759),
  12102. (91, 0.707),
  12103. (92, 0.708),
  12104. (93, 0.682),
  12105. (94, 0.701),
  12106. (95, 0.826),
  12107. (96, 0.815),
  12108. (97, 0.789),
  12109. (98, 0.789),
  12110. (99, 0.707),
  12111. (100, 0.687),
  12112. (101, 0.696),
  12113. (102, 0.689),
  12114. (103, 0.786),
  12115. (104, 0.787),
  12116. (105, 0.713),
  12117. (106, 0.791),
  12118. (107, 0.785),
  12119. (108, 0.791),
  12120. (109, 0.873),
  12121. (110, 0.761),
  12122. (111, 0.762),
  12123. (112, 0.762),
  12124. (113, 0.759),
  12125. (114, 0.759),
  12126. (115, 0.892),
  12127. (116, 0.892),
  12128. (117, 0.788),
  12129. (118, 0.784),
  12130. (119, 0.438),
  12131. (120, 0.138),
  12132. (121, 0.277),
  12133. (122, 0.415),
  12134. (123, 0.392),
  12135. (124, 0.392),
  12136. (125, 0.668),
  12137. (126, 0.668),
  12138. (183, 0.788),
  12139. (183, 0.788),
  12140. (183, 0.788),
  12141. (183, 0.788),
  12142. (183, 0.788),
  12143. (183, 0.788),
  12144. (183, 0.788),
  12145. (183, 0.788),
  12146. (183, 0.788),
  12147. (183, 0.788),
  12148. (183, 0.788),
  12149. (183, 0.788),
  12150. (183, 0.788),
  12151. (183, 0.788),
  12152. (183, 0.788),
  12153. (183, 0.788),
  12154. (183, 0.788),
  12155. (183, 0.788),
  12156. (183, 0.788),
  12157. (183, 0.788),
  12158. (183, 0.788),
  12159. (183, 0.788),
  12160. (183, 0.788),
  12161. (183, 0.788),
  12162. (183, 0.788),
  12163. (183, 0.788),
  12164. (183, 0.788),
  12165. (183, 0.788),
  12166. (183, 0.788),
  12167. (183, 0.788),
  12168. (183, 0.788),
  12169. (183, 0.788),
  12170. (183, 0.788),
  12171. (183, 0.788),
  12172. (161, 0.732),
  12173. (162, 0.544),
  12174. (163, 0.544),
  12175. (164, 0.91),
  12176. (165, 0.667),
  12177. (166, 0.76),
  12178. (167, 0.76),
  12179. (168, 0.776),
  12180. (169, 0.595),
  12181. (170, 0.694),
  12182. (171, 0.626),
  12183. (172, 0.788),
  12184. (173, 0.788),
  12185. (174, 0.788),
  12186. (175, 0.788),
  12187. (176, 0.788),
  12188. (177, 0.788),
  12189. (178, 0.788),
  12190. (179, 0.788),
  12191. (180, 0.788),
  12192. (181, 0.788),
  12193. (182, 0.788),
  12194. (183, 0.788),
  12195. (184, 0.788),
  12196. (185, 0.788),
  12197. (186, 0.788),
  12198. (187, 0.788),
  12199. (188, 0.788),
  12200. (189, 0.788),
  12201. (190, 0.788),
  12202. (191, 0.788),
  12203. (192, 0.788),
  12204. (193, 0.788),
  12205. (194, 0.788),
  12206. (195, 0.788),
  12207. (196, 0.788),
  12208. (197, 0.788),
  12209. (198, 0.788),
  12210. (199, 0.788),
  12211. (200, 0.788),
  12212. (201, 0.788),
  12213. (202, 0.788),
  12214. (203, 0.788),
  12215. (204, 0.788),
  12216. (205, 0.788),
  12217. (206, 0.788),
  12218. (207, 0.788),
  12219. (208, 0.788),
  12220. (209, 0.788),
  12221. (210, 0.788),
  12222. (211, 0.788),
  12223. (212, 0.894),
  12224. (213, 0.838),
  12225. (214, 1.016),
  12226. (215, 0.458),
  12227. (216, 0.748),
  12228. (217, 0.924),
  12229. (218, 0.748),
  12230. (219, 0.918),
  12231. (220, 0.927),
  12232. (221, 0.928),
  12233. (222, 0.928),
  12234. (223, 0.834),
  12235. (224, 0.873),
  12236. (225, 0.828),
  12237. (226, 0.924),
  12238. (227, 0.924),
  12239. (228, 0.917),
  12240. (229, 0.93),
  12241. (230, 0.931),
  12242. (231, 0.463),
  12243. (232, 0.883),
  12244. (233, 0.836),
  12245. (234, 0.836),
  12246. (235, 0.867),
  12247. (236, 0.867),
  12248. (237, 0.696),
  12249. (238, 0.696),
  12250. (239, 0.874),
  12251. (183, 0.788),
  12252. (241, 0.874),
  12253. (242, 0.76),
  12254. (243, 0.946),
  12255. (244, 0.771),
  12256. (245, 0.865),
  12257. (246, 0.771),
  12258. (247, 0.888),
  12259. (248, 0.967),
  12260. (249, 0.888),
  12261. (250, 0.831),
  12262. (251, 0.873),
  12263. (252, 0.927),
  12264. (253, 0.97),
  12265. (183, 0.788),
  12266. (183, 0.788),
  12267. )
  12268. # Functions
  12269. #
  12270. def _read_samples( pixmap, offset, n):
  12271. # fixme: need to be able to get a sample in one call, as a Python
  12272. # bytes or similar.
  12273. ret = []
  12274. if not pixmap.samples():
  12275. # mupdf.fz_samples_get() gives a segv if pixmap->samples is null.
  12276. return ret
  12277. for i in range( n):
  12278. ret.append( mupdf.fz_samples_get( pixmap, offset + i))
  12279. return bytes( ret)
  12280. def _INRANGE(v, low, high):
  12281. return low <= v and v <= high
  12282. def _remove_dest_range(pdf, numbers):
  12283. pagecount = mupdf.pdf_count_pages(pdf)
  12284. for i in range(pagecount):
  12285. n1 = i
  12286. if n1 in numbers:
  12287. continue
  12288. pageref = mupdf.pdf_lookup_page_obj( pdf, i)
  12289. annots = mupdf.pdf_dict_get( pageref, PDF_NAME('Annots'))
  12290. if not annots.m_internal:
  12291. continue
  12292. len_ = mupdf.pdf_array_len(annots)
  12293. for j in range(len_ - 1, -1, -1):
  12294. o = mupdf.pdf_array_get( annots, j)
  12295. if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( o, PDF_NAME('Subtype')), PDF_NAME('Link')):
  12296. continue
  12297. action = mupdf.pdf_dict_get( o, PDF_NAME('A'))
  12298. dest = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
  12299. if action.m_internal:
  12300. if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( action, PDF_NAME('S')), PDF_NAME('GoTo')):
  12301. continue
  12302. dest = mupdf.pdf_dict_get( action, PDF_NAME('D'))
  12303. pno = -1
  12304. if mupdf.pdf_is_array( dest):
  12305. target = mupdf.pdf_array_get( dest, 0)
  12306. pno = mupdf.pdf_lookup_page_number( pdf, target)
  12307. elif mupdf.pdf_is_string( dest):
  12308. location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest))
  12309. pno = location.page
  12310. if pno < 0: # page number lookup did not work
  12311. continue
  12312. n1 = pno
  12313. if n1 in numbers:
  12314. mupdf.pdf_array_delete( annots, j)
  12315. def ASSERT_PDF(cond):
  12316. assert isinstance(cond, (mupdf.PdfPage, mupdf.PdfDocument)), f'{type(cond)=} {cond=}'
  12317. if not cond.m_internal:
  12318. raise Exception(MSG_IS_NO_PDF)
  12319. def EMPTY_IRECT():
  12320. return IRect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
  12321. def EMPTY_QUAD():
  12322. return EMPTY_RECT().quad
  12323. def EMPTY_RECT():
  12324. return Rect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
  12325. def ENSURE_OPERATION(pdf):
  12326. if not JM_have_operation(pdf):
  12327. raise Exception("No journalling operation started")
  12328. def INFINITE_IRECT():
  12329. return IRect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
  12330. def INFINITE_QUAD():
  12331. return INFINITE_RECT().quad
  12332. def INFINITE_RECT():
  12333. return Rect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
  12334. def JM_BinFromBuffer(buffer_):
  12335. '''
  12336. Turn fz_buffer into a Python bytes object
  12337. '''
  12338. assert isinstance(buffer_, mupdf.FzBuffer)
  12339. ret = mupdf.fz_buffer_extract_copy(buffer_)
  12340. return ret
  12341. def JM_EscapeStrFromStr(c):
  12342. # `c` is typically from SWIG which will have converted a `const char*` from
  12343. # C into a Python `str` using `PyUnicode_DecodeUTF8(carray, static_cast<
  12344. # Py_ssize_t >(size), "surrogateescape")`. This gives us a Python `str`
  12345. # with some characters encoded as a \0xdcXY sequence, where `XY` are hex
  12346. # digits for an invalid byte in the original `const char*`.
  12347. #
  12348. # This is actually a reasonable way of representing arbitrary
  12349. # strings from C, but we want to mimic what PyMuPDF does. It uses
  12350. # `PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace")`
  12351. # which gives a string containing actual unicode characters for any invalid
  12352. # bytes.
  12353. #
  12354. # We mimic this by converting the `str` to a `bytes` with 'surrogateescape'
  12355. # to recognise \0xdcXY sequences, then convert the individual bytes into a
  12356. # `str` using `chr()`.
  12357. #
  12358. # Would be good to have a more efficient way to do this.
  12359. #
  12360. if c is None:
  12361. return ''
  12362. assert isinstance(c, str), f'{type(c)=}'
  12363. b = c.encode('utf8', 'surrogateescape')
  12364. ret = ''
  12365. for bb in b:
  12366. ret += chr(bb)
  12367. return ret
  12368. def JM_BufferFromBytes(stream):
  12369. '''
  12370. Make fz_buffer from a PyBytes, PyByteArray or io.BytesIO object. If a text
  12371. io.BytesIO, we convert to binary by encoding as utf8.
  12372. '''
  12373. if isinstance(stream, (bytes, bytearray)):
  12374. data = stream
  12375. elif hasattr(stream, 'getvalue'):
  12376. data = stream.getvalue()
  12377. if isinstance(data, str):
  12378. data = data.encode('utf-8')
  12379. if not isinstance(data, (bytes, bytearray)):
  12380. raise Exception(f'.getvalue() returned unexpected type: {type(data)}')
  12381. else:
  12382. return mupdf.FzBuffer()
  12383. return mupdf.fz_new_buffer_from_copied_data(data)
  12384. def JM_FLOAT_ITEM(obj, idx):
  12385. if not PySequence_Check(obj):
  12386. return None
  12387. return float(obj[idx])
  12388. def JM_INT_ITEM(obj, idx):
  12389. if idx < len(obj):
  12390. temp = obj[idx]
  12391. if isinstance(temp, (int, float)):
  12392. return 0, temp
  12393. return 1, None
  12394. def JM_pixmap_from_page(doc, page, ctm, cs, alpha, annots, clip):
  12395. '''
  12396. Pixmap creation directly using a short-lived displaylist, so we can support
  12397. separations.
  12398. '''
  12399. SPOTS_NONE = 0
  12400. SPOTS_OVERPRINT_SIM = 1
  12401. SPOTS_FULL = 2
  12402. FZ_ENABLE_SPOT_RENDERING = True # fixme: this is a build-time setting in MuPDF's config.h.
  12403. if FZ_ENABLE_SPOT_RENDERING:
  12404. spots = SPOTS_OVERPRINT_SIM
  12405. else:
  12406. spots = SPOTS_NONE
  12407. seps = None
  12408. colorspace = cs
  12409. matrix = JM_matrix_from_py(ctm)
  12410. rect = mupdf.fz_bound_page(page)
  12411. rclip = JM_rect_from_py(clip)
  12412. rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
  12413. rect = mupdf.fz_transform_rect(rect, matrix)
  12414. bbox = mupdf.fz_round_rect(rect)
  12415. # Pixmap of the document's /OutputIntents ("output intents")
  12416. oi = mupdf.fz_document_output_intent(doc)
  12417. # if present and compatible, use it instead of the parameter
  12418. if oi.m_internal:
  12419. if mupdf.fz_colorspace_n(oi) == mupdf.fz_colorspace_n(cs):
  12420. colorspace = mupdf.fz_keep_colorspace(oi)
  12421. # check if spots rendering is available and if so use separations
  12422. if spots != SPOTS_NONE:
  12423. seps = mupdf.fz_page_separations(page)
  12424. if seps.m_internal:
  12425. n = mupdf.fz_count_separations(seps)
  12426. if spots == SPOTS_FULL:
  12427. for i in range(n):
  12428. mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_SPOT)
  12429. else:
  12430. for i in range(n):
  12431. mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_COMPOSITE)
  12432. elif mupdf.fz_page_uses_overprint(page):
  12433. # This page uses overprint, so we need an empty
  12434. # sep object to force the overprint simulation on.
  12435. seps = mupdf.fz_new_separations(0)
  12436. elif oi.m_internal and mupdf.fz_colorspace_n(oi) != mupdf.fz_colorspace_n(colorspace):
  12437. # We have an output intent, and it's incompatible
  12438. # with the colorspace our device needs. Force the
  12439. # overprint simulation on, because this ensures that
  12440. # we 'simulate' the output intent too.
  12441. seps = mupdf.fz_new_separations(0)
  12442. pix = mupdf.fz_new_pixmap_with_bbox(colorspace, bbox, seps, alpha)
  12443. if alpha:
  12444. mupdf.fz_clear_pixmap(pix)
  12445. else:
  12446. mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
  12447. dev = mupdf.fz_new_draw_device(matrix, pix)
  12448. if annots:
  12449. mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  12450. else:
  12451. mupdf.fz_run_page_contents(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  12452. mupdf.fz_close_device(dev)
  12453. return pix
  12454. def JM_StrAsChar(x):
  12455. # fixme: should encode, but swig doesn't pass bytes to C as const char*.
  12456. return x
  12457. #return x.encode('utf8')
  12458. def JM_TUPLE(o: typing.Sequence) -> tuple:
  12459. return tuple(map(lambda x: round(x, 5) if abs(x) >= 1e-4 else 0, o))
  12460. def JM_TUPLE3(o: typing.Sequence) -> tuple:
  12461. return tuple(map(lambda x: round(x, 3) if abs(x) >= 1e-3 else 0, o))
  12462. def JM_UnicodeFromStr(s):
  12463. if s is None:
  12464. return ''
  12465. if isinstance(s, bytes):
  12466. s = s.decode('utf8')
  12467. assert isinstance(s, str), f'{type(s)=} {s=}'
  12468. return s
  12469. def JM_add_annot_id(annot, stem):
  12470. '''
  12471. Add a unique /NM key to an annotation or widget.
  12472. Append a number to 'stem' such that the result is a unique name.
  12473. '''
  12474. assert isinstance(annot, mupdf.PdfAnnot)
  12475. page = _pdf_annot_page(annot)
  12476. annot_obj = mupdf.pdf_annot_obj( annot)
  12477. names = JM_get_annot_id_list(page)
  12478. i = 0
  12479. while 1:
  12480. stem_id = f'{JM_annot_id_stem}-{stem}{i}'
  12481. if stem_id not in names:
  12482. break
  12483. i += 1
  12484. response = JM_StrAsChar(stem_id)
  12485. name = mupdf.pdf_new_string( response, len(response))
  12486. mupdf.pdf_dict_puts(annot_obj, "NM", name)
  12487. page.doc().m_internal.resynth_required = 0
  12488. def JM_add_oc_object(pdf, ref, xref):
  12489. '''
  12490. Add OC object reference to a dictionary
  12491. '''
  12492. indobj = mupdf.pdf_new_indirect(pdf, xref, 0)
  12493. if not mupdf.pdf_is_dict(indobj):
  12494. RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
  12495. type_ = mupdf.pdf_dict_get(indobj, PDF_NAME('Type'))
  12496. if (mupdf.pdf_objcmp(type_, PDF_NAME('OCG')) == 0
  12497. or mupdf.pdf_objcmp(type_, PDF_NAME('OCMD')) == 0
  12498. ):
  12499. mupdf.pdf_dict_put(ref, PDF_NAME('OC'), indobj)
  12500. else:
  12501. RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
  12502. def JM_annot_border(annot_obj):
  12503. dash_py = list()
  12504. style = None
  12505. width = -1
  12506. clouds = -1
  12507. obj = None
  12508. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Border'))
  12509. if mupdf.pdf_is_array( obj):
  12510. width = mupdf.pdf_to_real( mupdf.pdf_array_get( obj, 2))
  12511. if mupdf.pdf_array_len( obj) == 4:
  12512. dash = mupdf.pdf_array_get( obj, 3)
  12513. for i in range( mupdf.pdf_array_len( dash)):
  12514. val = mupdf.pdf_to_int( mupdf.pdf_array_get( dash, i))
  12515. dash_py.append( val)
  12516. bs_o = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BS'))
  12517. if bs_o.m_internal:
  12518. width = mupdf.pdf_to_real( mupdf.pdf_dict_get( bs_o, PDF_NAME('W')))
  12519. style = mupdf.pdf_to_name( mupdf.pdf_dict_get( bs_o, PDF_NAME('S')))
  12520. if style == '':
  12521. style = None
  12522. obj = mupdf.pdf_dict_get( bs_o, PDF_NAME('D'))
  12523. if obj.m_internal:
  12524. for i in range( mupdf.pdf_array_len( obj)):
  12525. val = mupdf.pdf_to_int( mupdf.pdf_array_get( obj, i))
  12526. dash_py.append( val)
  12527. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
  12528. if obj.m_internal:
  12529. clouds = mupdf.pdf_to_int( mupdf.pdf_dict_get( obj, PDF_NAME('I')))
  12530. res = dict()
  12531. res[ dictkey_width] = width
  12532. res[ dictkey_dashes] = tuple( dash_py)
  12533. res[ dictkey_style] = style
  12534. res[ 'clouds'] = clouds
  12535. return res
  12536. def JM_annot_colors(annot_obj):
  12537. res = dict()
  12538. bc = list() # stroke colors
  12539. fc =list() # fill colors
  12540. o = mupdf.pdf_dict_get(annot_obj, mupdf.PDF_ENUM_NAME_C)
  12541. if mupdf.pdf_is_array(o):
  12542. n = mupdf.pdf_array_len(o)
  12543. for i in range(n):
  12544. col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
  12545. bc.append(col)
  12546. res[dictkey_stroke] = bc
  12547. o = mupdf.pdf_dict_gets(annot_obj, "IC")
  12548. if mupdf.pdf_is_array(o):
  12549. n = mupdf.pdf_array_len(o)
  12550. for i in range(n):
  12551. col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
  12552. fc.append(col)
  12553. res[dictkey_fill] = fc
  12554. return res
  12555. def JM_annot_set_border( border, doc, annot_obj):
  12556. assert isinstance(border, dict)
  12557. obj = None
  12558. dashlen = 0
  12559. nwidth = border.get( dictkey_width) # new width
  12560. ndashes = border.get( dictkey_dashes) # new dashes
  12561. nstyle = border.get( dictkey_style) # new style
  12562. nclouds = border.get( 'clouds', -1) # new clouds value
  12563. # get old border properties
  12564. oborder = JM_annot_border( annot_obj)
  12565. # delete border-related entries
  12566. mupdf.pdf_dict_del( annot_obj, PDF_NAME('BS'))
  12567. mupdf.pdf_dict_del( annot_obj, PDF_NAME('BE'))
  12568. mupdf.pdf_dict_del( annot_obj, PDF_NAME('Border'))
  12569. # populate border items: keep old values for any omitted new ones
  12570. if nwidth < 0:
  12571. nwidth = oborder.get( dictkey_width) # no new width: keep current
  12572. if ndashes is None:
  12573. ndashes = oborder.get( dictkey_dashes) # no new dashes: keep old
  12574. if nstyle is None:
  12575. nstyle = oborder.get( dictkey_style) # no new style: keep old
  12576. if nclouds < 0:
  12577. nclouds = oborder.get( "clouds", -1) # no new clouds: keep old
  12578. if isinstance( ndashes, tuple) and len( ndashes) > 0:
  12579. dashlen = len( ndashes)
  12580. darr = mupdf.pdf_new_array( doc, dashlen)
  12581. for d in ndashes:
  12582. mupdf.pdf_array_push_int( darr, d)
  12583. mupdf.pdf_dict_putl( annot_obj, darr, PDF_NAME('BS'), PDF_NAME('D'))
  12584. mupdf.pdf_dict_putl(
  12585. annot_obj,
  12586. mupdf.pdf_new_real( nwidth),
  12587. PDF_NAME('BS'),
  12588. PDF_NAME('W'),
  12589. )
  12590. if dashlen == 0:
  12591. obj = JM_get_border_style( nstyle)
  12592. else:
  12593. obj = PDF_NAME('D')
  12594. mupdf.pdf_dict_putl( annot_obj, obj, PDF_NAME('BS'), PDF_NAME('S'))
  12595. if nclouds > 0:
  12596. mupdf.pdf_dict_put_dict( annot_obj, PDF_NAME('BE'), 2)
  12597. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
  12598. mupdf.pdf_dict_put( obj, PDF_NAME('S'), PDF_NAME('C'))
  12599. mupdf.pdf_dict_put_int( obj, PDF_NAME('I'), nclouds)
  12600. def make_escape(ch):
  12601. if ch == 92:
  12602. return "\\u005c"
  12603. elif 32 <= ch <= 127 or ch == 10:
  12604. return chr(ch)
  12605. elif 0xd800 <= ch <= 0xdfff: # orphaned surrogate
  12606. return "\\ufffd"
  12607. elif ch <= 0xffff:
  12608. return "\\u%04x" % ch
  12609. else:
  12610. return "\\U%08x" % ch
  12611. def JM_append_rune(buff, ch):
  12612. """
  12613. APPEND non-ascii runes in unicode escape format to fz_buffer.
  12614. """
  12615. mupdf.fz_append_string(buff, make_escape(ch))
  12616. def JM_append_word(lines, buff, wbbox, block_n, line_n, word_n):
  12617. '''
  12618. Functions for wordlist output
  12619. '''
  12620. s = JM_EscapeStrFromBuffer(buff)
  12621. litem = (
  12622. wbbox.x0,
  12623. wbbox.y0,
  12624. wbbox.x1,
  12625. wbbox.y1,
  12626. s,
  12627. block_n,
  12628. line_n,
  12629. word_n,
  12630. )
  12631. lines.append(litem)
  12632. return word_n + 1, mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word counter
  12633. def JM_add_layer_config( pdf, name, creator, ON):
  12634. '''
  12635. Add OC configuration to the PDF catalog
  12636. '''
  12637. ocp = JM_ensure_ocproperties( pdf)
  12638. configs = mupdf.pdf_dict_get( ocp, PDF_NAME('Configs'))
  12639. if not mupdf.pdf_is_array( configs):
  12640. configs = mupdf.pdf_dict_put_array( ocp, PDF_NAME('Configs'), 1)
  12641. D = mupdf.pdf_new_dict( pdf, 5)
  12642. mupdf.pdf_dict_put_text_string( D, PDF_NAME('Name'), name)
  12643. if creator is not None:
  12644. mupdf.pdf_dict_put_text_string( D, PDF_NAME('Creator'), creator)
  12645. mupdf.pdf_dict_put( D, PDF_NAME('BaseState'), PDF_NAME('OFF'))
  12646. onarray = mupdf.pdf_dict_put_array( D, PDF_NAME('ON'), 5)
  12647. if not ON:
  12648. pass
  12649. else:
  12650. ocgs = mupdf.pdf_dict_get( ocp, PDF_NAME('OCGs'))
  12651. n = len(ON)
  12652. for i in range(n):
  12653. xref = 0
  12654. e, xref = JM_INT_ITEM(ON, i)
  12655. if e == 1:
  12656. continue
  12657. ind = mupdf.pdf_new_indirect( pdf, xref, 0)
  12658. if mupdf.pdf_array_contains( ocgs, ind):
  12659. mupdf.pdf_array_push( onarray, ind)
  12660. mupdf.pdf_array_push( configs, D)
  12661. def JM_char_bbox(line, ch):
  12662. '''
  12663. return rect of char quad
  12664. '''
  12665. q = JM_char_quad(line, ch)
  12666. r = mupdf.fz_rect_from_quad(q)
  12667. if not line.m_internal.wmode:
  12668. return r
  12669. if r.y1 < r.y0 + ch.m_internal.size:
  12670. r.y0 = r.y1 - ch.m_internal.size
  12671. return r
  12672. def JM_char_font_flags(font, line, ch):
  12673. flags = 0
  12674. if line and ch:
  12675. flags += detect_super_script(line, ch)
  12676. flags += mupdf.fz_font_is_italic(font) * TEXT_FONT_ITALIC
  12677. flags += mupdf.fz_font_is_serif(font) * TEXT_FONT_SERIFED
  12678. flags += mupdf.fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED
  12679. flags += mupdf.fz_font_is_bold(font) * TEXT_FONT_BOLD
  12680. return flags
  12681. def JM_char_quad(line, ch):
  12682. '''
  12683. re-compute char quad if ascender/descender values make no sense
  12684. '''
  12685. if 1 and g_use_extra:
  12686. # This reduces time taken to extract text from PyMuPDF.pdf from 20s to
  12687. # 15s.
  12688. return mupdf.FzQuad(extra.JM_char_quad( line.m_internal, ch.m_internal))
  12689. assert isinstance(line, mupdf.FzStextLine)
  12690. assert isinstance(ch, mupdf.FzStextChar)
  12691. if _globals.skip_quad_corrections: # no special handling
  12692. return ch.quad
  12693. if line.m_internal.wmode: # never touch vertical write mode
  12694. return ch.quad
  12695. font = mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))
  12696. asc = JM_font_ascender(font)
  12697. dsc = JM_font_descender(font)
  12698. fsize = ch.m_internal.size
  12699. asc_dsc = asc - dsc + FLT_EPSILON
  12700. if asc_dsc >= 1 and _globals.small_glyph_heights == 0: # no problem
  12701. return mupdf.FzQuad(ch.m_internal.quad)
  12702. # Re-compute quad with adjusted ascender / descender values:
  12703. # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
  12704. # re-rotate and move back to ch->origin location.
  12705. fsize = ch.m_internal.size
  12706. bbox = mupdf.fz_font_bbox(font)
  12707. fwidth = bbox.x1 - bbox.x0
  12708. if asc < 1e-3: # probably Tesseract glyphless font
  12709. dsc = -0.1
  12710. asc = 0.9
  12711. asc_dsc = 1.0
  12712. if _globals.small_glyph_heights or asc_dsc < 1:
  12713. dsc = dsc / asc_dsc
  12714. asc = asc / asc_dsc
  12715. asc_dsc = asc - dsc
  12716. asc = asc * fsize / asc_dsc
  12717. dsc = dsc * fsize / asc_dsc
  12718. # Re-compute quad with the adjusted ascender / descender values:
  12719. # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
  12720. # re-rotate and move back to ch->origin location.
  12721. c = line.m_internal.dir.x # cosine
  12722. s = line.m_internal.dir.y # sine
  12723. trm1 = mupdf.fz_make_matrix(c, -s, s, c, 0, 0) # derotate
  12724. trm2 = mupdf.fz_make_matrix(c, s, -s, c, 0, 0) # rotate
  12725. if (c == -1): # left-right flip
  12726. trm1.d = 1
  12727. trm2.d = 1
  12728. xlate1 = mupdf.fz_make_matrix(1, 0, 0, 1, -ch.m_internal.origin.x, -ch.m_internal.origin.y)
  12729. xlate2 = mupdf.fz_make_matrix(1, 0, 0, 1, ch.m_internal.origin.x, ch.m_internal.origin.y)
  12730. quad = mupdf.fz_transform_quad(mupdf.FzQuad(ch.m_internal.quad), xlate1) # move origin to (0,0)
  12731. quad = mupdf.fz_transform_quad(quad, trm1) # de-rotate corners
  12732. # adjust vertical coordinates
  12733. if c == 1 and quad.ul.y > 0: # up-down flip
  12734. quad.ul.y = asc
  12735. quad.ur.y = asc
  12736. quad.ll.y = dsc
  12737. quad.lr.y = dsc
  12738. else:
  12739. quad.ul.y = -asc
  12740. quad.ur.y = -asc
  12741. quad.ll.y = -dsc
  12742. quad.lr.y = -dsc
  12743. # adjust horizontal coordinates that are too crazy:
  12744. # (1) left x must be >= 0
  12745. # (2) if bbox width is 0, lookup char advance in font.
  12746. if quad.ll.x < 0:
  12747. quad.ll.x = 0
  12748. quad.ul.x = 0
  12749. cwidth = quad.lr.x - quad.ll.x
  12750. if cwidth < FLT_EPSILON:
  12751. glyph = mupdf.fz_encode_character( font, ch.m_internal.c)
  12752. if glyph:
  12753. fwidth = mupdf.fz_advance_glyph( font, glyph, line.m_internal.wmode)
  12754. quad.lr.x = quad.ll.x + fwidth * fsize
  12755. quad.ur.x = quad.lr.x
  12756. quad = mupdf.fz_transform_quad(quad, trm2) # rotate back
  12757. quad = mupdf.fz_transform_quad(quad, xlate2) # translate back
  12758. return quad
  12759. def JM_choice_options(annot):
  12760. '''
  12761. return list of choices for list or combo boxes
  12762. '''
  12763. annot_obj = mupdf.pdf_annot_obj( annot.this)
  12764. opts = mupdf.pdf_choice_widget_options2( annot, 0)
  12765. n = len( opts)
  12766. if n == 0:
  12767. return # wrong widget type
  12768. optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Opt'))
  12769. liste = []
  12770. for i in range( n):
  12771. m = mupdf.pdf_array_len( mupdf.pdf_array_get( optarr, i))
  12772. if m == 2:
  12773. val = (
  12774. mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 0)),
  12775. mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 1)),
  12776. )
  12777. liste.append( val)
  12778. else:
  12779. val = mupdf.pdf_to_text_string( mupdf.pdf_array_get( optarr, i))
  12780. liste.append( val)
  12781. return liste
  12782. def JM_clear_pixmap_rect_with_value(dest, value, b):
  12783. '''
  12784. Clear a pixmap rectangle - my version also supports non-alpha pixmaps
  12785. '''
  12786. b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox(dest))
  12787. w = b.x1 - b.x0
  12788. y = b.y1 - b.y0
  12789. if w <= 0 or y <= 0:
  12790. return 0
  12791. destspan = dest.stride()
  12792. destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
  12793. # CMYK needs special handling (and potentially any other subtractive colorspaces)
  12794. if mupdf.fz_colorspace_n(dest.colorspace()) == 4:
  12795. value = 255 - value
  12796. while 1:
  12797. s = destp
  12798. for x in range(0, w):
  12799. mupdf.fz_samples_set(dest, s, 0)
  12800. s += 1
  12801. mupdf.fz_samples_set(dest, s, 0)
  12802. s += 1
  12803. mupdf.fz_samples_set(dest, s, 0)
  12804. s += 1
  12805. mupdf.fz_samples_set(dest, s, value)
  12806. s += 1
  12807. if dest.alpha():
  12808. mupdf.fz_samples_set(dest, s, 255)
  12809. s += 1
  12810. destp += destspan
  12811. if y == 0:
  12812. break
  12813. y -= 1
  12814. return 1
  12815. while 1:
  12816. s = destp
  12817. for x in range(w):
  12818. for k in range(dest.n()-1):
  12819. mupdf.fz_samples_set(dest, s, value)
  12820. s += 1
  12821. if dest.alpha():
  12822. mupdf.fz_samples_set(dest, s, 255)
  12823. s += 1
  12824. else:
  12825. mupdf.fz_samples_set(dest, s, value)
  12826. s += 1
  12827. destp += destspan
  12828. if y == 0:
  12829. break
  12830. y -= 1
  12831. return 1
  12832. def JM_color_FromSequence(color):
  12833. if isinstance(color, (int, float)): # maybe just a single float
  12834. color = [color]
  12835. if not isinstance( color, (list, tuple)):
  12836. return -1, []
  12837. if len(color) not in (0, 1, 3, 4):
  12838. return -1, []
  12839. ret = color[:]
  12840. for i in range(len(ret)):
  12841. if ret[i] < 0 or ret[i] > 1:
  12842. ret[i] = 1
  12843. return len(ret), ret
  12844. def JM_color_count( pm, clip):
  12845. if g_use_extra:
  12846. return extra.ll_JM_color_count(pm.m_internal, clip)
  12847. rc = dict()
  12848. cnt = 0
  12849. irect = mupdf.fz_pixmap_bbox( pm)
  12850. irect = mupdf.fz_intersect_irect(irect, mupdf.fz_round_rect(JM_rect_from_py(clip)))
  12851. stride = pm.stride()
  12852. width = irect.x1 - irect.x0
  12853. height = irect.y1 - irect.y0
  12854. n = pm.n()
  12855. substride = width * n
  12856. s = stride * (irect.y0 - pm.y()) + (irect.x0 - pm.x()) * n
  12857. oldpix = _read_samples( pm, s, n)
  12858. cnt = 0
  12859. if mupdf.fz_is_empty_irect(irect):
  12860. return rc
  12861. for i in range( height):
  12862. for j in range( 0, substride, n):
  12863. newpix = _read_samples( pm, s + j, n)
  12864. if newpix != oldpix:
  12865. pixel = oldpix
  12866. c = rc.get( pixel, None)
  12867. if c is not None:
  12868. cnt += c
  12869. rc[ pixel] = cnt
  12870. cnt = 1
  12871. oldpix = newpix
  12872. else:
  12873. cnt += 1
  12874. s += stride
  12875. pixel = oldpix
  12876. c = rc.get( pixel)
  12877. if c is not None:
  12878. cnt += c
  12879. rc[ pixel] = cnt
  12880. return rc
  12881. def JM_compress_buffer(inbuffer):
  12882. '''
  12883. compress char* into a new buffer
  12884. '''
  12885. data, compressed_length = mupdf.fz_new_deflated_data_from_buffer(
  12886. inbuffer,
  12887. mupdf.FZ_DEFLATE_BEST,
  12888. )
  12889. #log( '{=data compressed_length}')
  12890. if not data or compressed_length == 0:
  12891. return None
  12892. buf = mupdf.FzBuffer(mupdf.fz_new_buffer_from_data(data, compressed_length))
  12893. mupdf.fz_resize_buffer(buf, compressed_length)
  12894. return buf
  12895. def JM_copy_rectangle(page, area):
  12896. need_new_line = 0
  12897. buffer = io.StringIO()
  12898. for block in page:
  12899. if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
  12900. continue
  12901. for line in block:
  12902. line_had_text = 0
  12903. for ch in line:
  12904. r = JM_char_bbox(line, ch)
  12905. if JM_rects_overlap(area, r):
  12906. line_had_text = 1
  12907. if need_new_line:
  12908. buffer.write("\n")
  12909. need_new_line = 0
  12910. buffer.write(make_escape(ch.m_internal.c))
  12911. if line_had_text:
  12912. need_new_line = 1
  12913. s = buffer.getvalue() # take over the data
  12914. return s
  12915. def JM_convert_to_pdf(doc, fp, tp, rotate):
  12916. '''
  12917. Convert any MuPDF document to a PDF
  12918. Returns bytes object containing the PDF, created via 'write' function.
  12919. '''
  12920. pdfout = mupdf.PdfDocument()
  12921. incr = 1
  12922. s = fp
  12923. e = tp
  12924. if fp > tp:
  12925. incr = -1 # count backwards
  12926. s = tp # adjust ...
  12927. e = fp # ... range
  12928. rot = JM_norm_rotation(rotate)
  12929. i = fp
  12930. while 1: # interpret & write document pages as PDF pages
  12931. if not _INRANGE(i, s, e):
  12932. break
  12933. page = mupdf.fz_load_page(doc, i)
  12934. mediabox = mupdf.fz_bound_page(page)
  12935. dev, resources, contents = mupdf.pdf_page_write(pdfout, mediabox)
  12936. mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
  12937. mupdf.fz_close_device(dev)
  12938. dev = None
  12939. page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents)
  12940. mupdf.pdf_insert_page(pdfout, -1, page_obj)
  12941. i += incr
  12942. # PDF created - now write it to Python bytearray
  12943. # prepare write options structure
  12944. opts = mupdf.PdfWriteOptions()
  12945. opts.do_garbage = 4
  12946. opts.do_compress = 1
  12947. opts.do_compress_images = 1
  12948. opts.do_compress_fonts = 1
  12949. opts.do_sanitize = 1
  12950. opts.do_incremental = 0
  12951. opts.do_ascii = 0
  12952. opts.do_decompress = 0
  12953. opts.do_linear = 0
  12954. opts.do_clean = 1
  12955. opts.do_pretty = 0
  12956. res = mupdf.fz_new_buffer(8192)
  12957. out = mupdf.FzOutput(res)
  12958. mupdf.pdf_write_document(pdfout, out, opts)
  12959. out.fz_close_output()
  12960. c = mupdf.fz_buffer_extract_copy(res)
  12961. assert isinstance(c, bytes)
  12962. return c
  12963. # Copied from MuPDF v1.14
  12964. # Create widget
  12965. def JM_create_widget(doc, page, type, fieldname):
  12966. old_sigflags = mupdf.pdf_to_int(mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/SigFlags"))
  12967. #log( '*** JM_create_widget()')
  12968. #log( f'{mupdf.pdf_create_annot_raw=}')
  12969. #log( f'{page=}')
  12970. #log( f'{mupdf.PDF_ANNOT_WIDGET=}')
  12971. annot = mupdf.pdf_create_annot_raw(page, mupdf.PDF_ANNOT_WIDGET)
  12972. annot_obj = mupdf.pdf_annot_obj(annot)
  12973. try:
  12974. JM_set_field_type(doc, annot_obj, type)
  12975. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), fieldname)
  12976. if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  12977. sigflags = old_sigflags | (SigFlag_SignaturesExist | SigFlag_AppendOnly)
  12978. mupdf.pdf_dict_putl(
  12979. mupdf.pdf_trailer(doc),
  12980. mupdf.pdf_new_int(sigflags),
  12981. PDF_NAME('Root'),
  12982. PDF_NAME('AcroForm'),
  12983. PDF_NAME('SigFlags'),
  12984. )
  12985. # pdf_create_annot will have linked the new widget into the page's
  12986. # annot array. We also need it linked into the document's form
  12987. form = mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/Fields")
  12988. if not form.m_internal:
  12989. form = mupdf.pdf_new_array(doc, 1)
  12990. mupdf.pdf_dict_putl(
  12991. mupdf.pdf_trailer(doc),
  12992. form,
  12993. PDF_NAME('Root'),
  12994. PDF_NAME('AcroForm'),
  12995. PDF_NAME('Fields'),
  12996. )
  12997. mupdf.pdf_array_push(form, annot_obj) # Cleanup relies on this statement being last
  12998. except Exception:
  12999. if g_exceptions_verbose: exception_info()
  13000. mupdf.pdf_delete_annot(page, annot)
  13001. if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  13002. mupdf.pdf_dict_putl(
  13003. mupdf.pdf_trailer(doc),
  13004. mupdf.pdf_new_int(old_sigflags),
  13005. PDF_NAME('Root'),
  13006. PDF_NAME('AcroForm'),
  13007. PDF_NAME('SigFlags'),
  13008. )
  13009. raise
  13010. return annot
  13011. def JM_cropbox(page_obj):
  13012. '''
  13013. return a PDF page's CropBox
  13014. '''
  13015. if g_use_extra:
  13016. return extra.JM_cropbox(page_obj)
  13017. mediabox = JM_mediabox(page_obj)
  13018. cropbox = mupdf.pdf_to_rect(
  13019. mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('CropBox'))
  13020. )
  13021. if mupdf.fz_is_infinite_rect(cropbox) or mupdf.fz_is_empty_rect(cropbox):
  13022. cropbox = mediabox
  13023. y0 = mediabox.y1 - cropbox.y1
  13024. y1 = mediabox.y1 - cropbox.y0
  13025. cropbox.y0 = y0
  13026. cropbox.y1 = y1
  13027. return cropbox
  13028. def JM_cropbox_size(page_obj):
  13029. rect = JM_cropbox(page_obj)
  13030. w = abs(rect.x1 - rect.x0)
  13031. h = abs(rect.y1 - rect.y0)
  13032. size = mupdf.fz_make_point(w, h)
  13033. return size
  13034. def JM_derotate_page_matrix(page):
  13035. '''
  13036. just the inverse of rotation
  13037. '''
  13038. mp = JM_rotate_page_matrix(page)
  13039. return mupdf.fz_invert_matrix(mp)
  13040. def JM_embed_file(
  13041. pdf,
  13042. buf,
  13043. filename,
  13044. ufilename,
  13045. desc,
  13046. compress,
  13047. ):
  13048. '''
  13049. embed a new file in a PDF (not only /EmbeddedFiles entries)
  13050. '''
  13051. len_ = 0
  13052. val = mupdf.pdf_new_dict(pdf, 6)
  13053. mupdf.pdf_dict_put_dict(val, PDF_NAME('CI'), 4)
  13054. ef = mupdf.pdf_dict_put_dict(val, PDF_NAME('EF'), 4)
  13055. mupdf.pdf_dict_put_text_string(val, PDF_NAME('F'), filename)
  13056. mupdf.pdf_dict_put_text_string(val, PDF_NAME('UF'), ufilename)
  13057. mupdf.pdf_dict_put_text_string(val, PDF_NAME('Desc'), desc)
  13058. mupdf.pdf_dict_put(val, PDF_NAME('Type'), PDF_NAME('Filespec'))
  13059. bs = b' '
  13060. f = mupdf.pdf_add_stream(
  13061. pdf,
  13062. #mupdf.fz_fz_new_buffer_from_copied_data(bs),
  13063. mupdf.fz_new_buffer_from_copied_data(bs),
  13064. mupdf.PdfObj(),
  13065. 0,
  13066. )
  13067. mupdf.pdf_dict_put(ef, PDF_NAME('F'), f)
  13068. JM_update_stream(pdf, f, buf, compress)
  13069. len_, _ = mupdf.fz_buffer_storage(buf)
  13070. mupdf.pdf_dict_put_int(f, PDF_NAME('DL'), len_)
  13071. mupdf.pdf_dict_put_int(f, PDF_NAME('Length'), len_)
  13072. params = mupdf.pdf_dict_put_dict(f, PDF_NAME('Params'), 4)
  13073. mupdf.pdf_dict_put_int(params, PDF_NAME('Size'), len_)
  13074. return val
  13075. def JM_embedded_clean(pdf):
  13076. '''
  13077. perform some cleaning if we have /EmbeddedFiles:
  13078. (1) remove any /Limits if /Names exists
  13079. (2) remove any empty /Collection
  13080. (3) set /PageMode/UseAttachments
  13081. '''
  13082. root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
  13083. # remove any empty /Collection entry
  13084. coll = mupdf.pdf_dict_get(root, PDF_NAME('Collection'))
  13085. if coll.m_internal and mupdf.pdf_dict_len(coll) == 0:
  13086. mupdf.pdf_dict_del(root, PDF_NAME('Collection'))
  13087. efiles = mupdf.pdf_dict_getl(
  13088. root,
  13089. PDF_NAME('Names'),
  13090. PDF_NAME('EmbeddedFiles'),
  13091. PDF_NAME('Names'),
  13092. )
  13093. if efiles.m_internal:
  13094. mupdf.pdf_dict_put_name(root, PDF_NAME('PageMode'), "UseAttachments")
  13095. def JM_EscapeStrFromBuffer(buff):
  13096. if not buff.m_internal:
  13097. return ''
  13098. s = mupdf.fz_buffer_extract_copy(buff)
  13099. val = PyUnicode_DecodeRawUnicodeEscape(s, errors='replace')
  13100. return val
  13101. def JM_ensure_identity(pdf):
  13102. '''
  13103. Store ID in PDF trailer
  13104. '''
  13105. id_ = mupdf.pdf_dict_get( mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
  13106. if not id_.m_internal:
  13107. rnd0 = mupdf.fz_memrnd2(16)
  13108. # Need to convert raw bytes into a str to send to
  13109. # mupdf.pdf_new_string(). chr() seems to work for this.
  13110. rnd = ''
  13111. for i in rnd0:
  13112. rnd += chr(i)
  13113. id_ = mupdf.pdf_dict_put_array( mupdf.pdf_trailer( pdf), PDF_NAME('ID'), 2)
  13114. mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
  13115. mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
  13116. def JM_ensure_ocproperties(pdf):
  13117. '''
  13118. Ensure OCProperties, return /OCProperties key
  13119. '''
  13120. ocp = mupdf.pdf_dict_get(mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')), PDF_NAME('OCProperties'))
  13121. if ocp.m_internal:
  13122. return ocp
  13123. root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
  13124. ocp = mupdf.pdf_dict_put_dict(root, PDF_NAME('OCProperties'), 2)
  13125. mupdf.pdf_dict_put_array(ocp, PDF_NAME('OCGs'), 0)
  13126. D = mupdf.pdf_dict_put_dict(ocp, PDF_NAME('D'), 5)
  13127. mupdf.pdf_dict_put_array(D, PDF_NAME('ON'), 0)
  13128. mupdf.pdf_dict_put_array(D, PDF_NAME('OFF'), 0)
  13129. mupdf.pdf_dict_put_array(D, PDF_NAME('Order'), 0)
  13130. mupdf.pdf_dict_put_array(D, PDF_NAME('RBGroups'), 0)
  13131. return ocp
  13132. def JM_expand_fname(name):
  13133. '''
  13134. Make /DA string of annotation
  13135. '''
  13136. if not name: return "Helv"
  13137. if name.startswith("Co"): return "Cour"
  13138. if name.startswith("co"): return "Cour"
  13139. if name.startswith("Ti"): return "TiRo"
  13140. if name.startswith("ti"): return "TiRo"
  13141. if name.startswith("Sy"): return "Symb"
  13142. if name.startswith("sy"): return "Symb"
  13143. if name.startswith("Za"): return "ZaDb"
  13144. if name.startswith("za"): return "ZaDb"
  13145. return "Helv"
  13146. def JM_field_type_text(wtype):
  13147. '''
  13148. String from widget type
  13149. '''
  13150. if wtype == mupdf.PDF_WIDGET_TYPE_BUTTON:
  13151. return "Button"
  13152. if wtype == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
  13153. return "CheckBox"
  13154. if wtype == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  13155. return "RadioButton"
  13156. if wtype == mupdf.PDF_WIDGET_TYPE_TEXT:
  13157. return "Text"
  13158. if wtype == mupdf.PDF_WIDGET_TYPE_LISTBOX:
  13159. return "ListBox"
  13160. if wtype == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
  13161. return "ComboBox"
  13162. if wtype == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  13163. return "Signature"
  13164. return "unknown"
  13165. def JM_fill_pixmap_rect_with_color(dest, col, b):
  13166. assert isinstance(dest, mupdf.FzPixmap)
  13167. # fill a rect with a color tuple
  13168. b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest))
  13169. w = b.x1 - b.x0
  13170. y = b.y1 - b.y0
  13171. if w <= 0 or y <= 0:
  13172. return 0
  13173. destspan = dest.stride()
  13174. destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
  13175. while 1:
  13176. s = destp
  13177. for x in range(w):
  13178. for i in range( dest.n()):
  13179. mupdf.fz_samples_set(dest, s, col[i])
  13180. s += 1
  13181. destp += destspan
  13182. y -= 1
  13183. if y == 0:
  13184. break
  13185. return 1
  13186. def JM_find_annot_irt(annot):
  13187. '''
  13188. Return the first annotation whose /IRT key ("In Response To") points to
  13189. annot. Used to remove the response chain of a given annotation.
  13190. '''
  13191. assert isinstance(annot, mupdf.PdfAnnot)
  13192. irt_annot = None # returning this
  13193. annot_obj = mupdf.pdf_annot_obj(annot)
  13194. found = 0
  13195. # loop thru MuPDF's internal annots array
  13196. page = _pdf_annot_page(annot)
  13197. irt_annot = mupdf.pdf_first_annot(page)
  13198. while 1:
  13199. assert isinstance(irt_annot, mupdf.PdfAnnot)
  13200. if not irt_annot.m_internal:
  13201. break
  13202. irt_annot_obj = mupdf.pdf_annot_obj(irt_annot)
  13203. o = mupdf.pdf_dict_gets(irt_annot_obj, 'IRT')
  13204. if o.m_internal:
  13205. if not mupdf.pdf_objcmp(o, annot_obj):
  13206. found = 1
  13207. break
  13208. irt_annot = mupdf.pdf_next_annot(irt_annot)
  13209. if found:
  13210. return irt_annot
  13211. def JM_font_ascender(font):
  13212. '''
  13213. need own versions of ascender / descender
  13214. '''
  13215. assert isinstance(font, mupdf.FzFont)
  13216. if _globals.skip_quad_corrections:
  13217. return 0.8
  13218. return mupdf.fz_font_ascender(font)
  13219. def JM_font_descender(font):
  13220. '''
  13221. need own versions of ascender / descender
  13222. '''
  13223. assert isinstance(font, mupdf.FzFont)
  13224. if _globals.skip_quad_corrections:
  13225. return -0.2
  13226. ret = mupdf.fz_font_descender(font)
  13227. return ret
  13228. def JM_is_word_delimiter(ch, delimiters):
  13229. """Check if ch is an extra word delimiting character.
  13230. """
  13231. if (0
  13232. or ch <= 32
  13233. or ch == 160
  13234. or 0x202a <= ch <= 0x202e
  13235. ):
  13236. # covers any whitespace plus unicodes that switch between
  13237. # right-to-left and left-to-right languages
  13238. return True
  13239. if not delimiters: # no extra delimiters provided
  13240. return False
  13241. char = chr(ch)
  13242. for d in delimiters:
  13243. if d == char:
  13244. return True
  13245. return False
  13246. def JM_is_rtl_char(ch):
  13247. if ch < 0x590 or ch > 0x900:
  13248. return False
  13249. return True
  13250. def JM_font_name(font):
  13251. assert isinstance(font, mupdf.FzFont)
  13252. name = mupdf.fz_font_name(font)
  13253. s = name.find('+')
  13254. if _globals.subset_fontnames or s == -1 or s != 6:
  13255. return name
  13256. return name[s + 1:]
  13257. def JM_gather_fonts(pdf, dict_, fontlist, stream_xref):
  13258. rc = 1
  13259. n = mupdf.pdf_dict_len(dict_)
  13260. for i in range(n):
  13261. refname = mupdf.pdf_dict_get_key(dict_, i)
  13262. fontdict = mupdf.pdf_dict_get_val(dict_, i)
  13263. if not mupdf.pdf_is_dict(fontdict):
  13264. mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no font dict ({mupdf.pdf_to_num(fontdict)} 0 R)")
  13265. continue
  13266. subtype = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Subtype)
  13267. basefont = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_BaseFont)
  13268. if not basefont.m_internal or mupdf.pdf_is_null(basefont):
  13269. name = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Name)
  13270. else:
  13271. name = basefont
  13272. encoding = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Encoding)
  13273. if mupdf.pdf_is_dict(encoding):
  13274. encoding = mupdf.pdf_dict_get(encoding, mupdf.PDF_ENUM_NAME_BaseEncoding)
  13275. xref = mupdf.pdf_to_num(fontdict)
  13276. ext = "n/a"
  13277. if xref:
  13278. ext = JM_get_fontextension(pdf, xref)
  13279. entry = (
  13280. xref,
  13281. ext,
  13282. mupdf.pdf_to_name(subtype),
  13283. JM_EscapeStrFromStr(mupdf.pdf_to_name(name)),
  13284. mupdf.pdf_to_name(refname),
  13285. mupdf.pdf_to_name(encoding),
  13286. stream_xref,
  13287. )
  13288. fontlist.append(entry)
  13289. return rc
  13290. def JM_gather_forms(doc, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
  13291. '''
  13292. Store info of a /Form xobject in Python list
  13293. '''
  13294. assert isinstance(doc, mupdf.PdfDocument)
  13295. rc = 1
  13296. n = mupdf.pdf_dict_len(dict_)
  13297. for i in range(n):
  13298. refname = mupdf.pdf_dict_get_key( dict_, i)
  13299. imagedict = mupdf.pdf_dict_get_val(dict_, i)
  13300. if not mupdf.pdf_is_dict(imagedict):
  13301. mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no form dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
  13302. continue
  13303. type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
  13304. if not mupdf.pdf_name_eq(type_, PDF_NAME('Form')):
  13305. continue
  13306. o = mupdf.pdf_dict_get(imagedict, PDF_NAME('BBox'))
  13307. m = mupdf.pdf_dict_get(imagedict, PDF_NAME('Matrix'))
  13308. if m.m_internal:
  13309. mat = mupdf.pdf_to_matrix(m)
  13310. else:
  13311. mat = mupdf.FzMatrix()
  13312. if o.m_internal:
  13313. bbox = mupdf.fz_transform_rect( mupdf.pdf_to_rect(o), mat)
  13314. else:
  13315. bbox = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
  13316. xref = mupdf.pdf_to_num(imagedict)
  13317. entry = (
  13318. xref,
  13319. mupdf.pdf_to_name( refname),
  13320. stream_xref,
  13321. JM_py_from_rect(bbox),
  13322. )
  13323. imagelist.append(entry)
  13324. return rc
  13325. def JM_gather_images(doc: mupdf.PdfDocument, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
  13326. '''
  13327. Store info of an image in Python list
  13328. '''
  13329. rc = 1
  13330. n = mupdf.pdf_dict_len( dict_)
  13331. for i in range(n):
  13332. refname = mupdf.pdf_dict_get_key(dict_, i)
  13333. imagedict = mupdf.pdf_dict_get_val(dict_, i)
  13334. if not mupdf.pdf_is_dict(imagedict):
  13335. mupdf.fz_warn(f"'{mupdf.pdf_to_name(refname)}' is no image dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
  13336. continue
  13337. type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
  13338. if not mupdf.pdf_name_eq(type_, PDF_NAME('Image')):
  13339. continue
  13340. xref = mupdf.pdf_to_num(imagedict)
  13341. gen = 0
  13342. smask = mupdf.pdf_dict_geta(imagedict, PDF_NAME('SMask'), PDF_NAME('Mask'))
  13343. if smask.m_internal:
  13344. gen = mupdf.pdf_to_num(smask)
  13345. filter_ = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Filter'), PDF_NAME('F'))
  13346. if mupdf.pdf_is_array(filter_):
  13347. filter_ = mupdf.pdf_array_get(filter_, 0)
  13348. altcs = mupdf.PdfObj(0)
  13349. cs = mupdf.pdf_dict_geta(imagedict, PDF_NAME('ColorSpace'), PDF_NAME('CS'))
  13350. if mupdf.pdf_is_array(cs):
  13351. cses = cs
  13352. cs = mupdf.pdf_array_get(cses, 0)
  13353. if (mupdf.pdf_name_eq(cs, PDF_NAME('DeviceN'))
  13354. or mupdf.pdf_name_eq(cs, PDF_NAME('Separation'))
  13355. ):
  13356. altcs = mupdf.pdf_array_get(cses, 2)
  13357. if mupdf.pdf_is_array(altcs):
  13358. altcs = mupdf.pdf_array_get(altcs, 0)
  13359. width = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Width'), PDF_NAME('W'))
  13360. height = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Height'), PDF_NAME('H'))
  13361. bpc = mupdf.pdf_dict_geta(imagedict, PDF_NAME('BitsPerComponent'), PDF_NAME('BPC'))
  13362. entry = (
  13363. xref,
  13364. gen,
  13365. mupdf.pdf_to_int(width),
  13366. mupdf.pdf_to_int(height),
  13367. mupdf.pdf_to_int(bpc),
  13368. JM_EscapeStrFromStr(mupdf.pdf_to_name(cs)),
  13369. JM_EscapeStrFromStr(mupdf.pdf_to_name(altcs)),
  13370. JM_EscapeStrFromStr(mupdf.pdf_to_name(refname)),
  13371. JM_EscapeStrFromStr(mupdf.pdf_to_name(filter_)),
  13372. stream_xref,
  13373. )
  13374. imagelist.append(entry)
  13375. return rc
  13376. def JM_get_annot_by_xref(page, xref):
  13377. '''
  13378. retrieve annot by its xref
  13379. '''
  13380. assert isinstance(page, mupdf.PdfPage)
  13381. found = 0
  13382. # loop thru MuPDF's internal annots array
  13383. annot = mupdf.pdf_first_annot(page)
  13384. while 1:
  13385. if not annot.m_internal:
  13386. break
  13387. if xref == mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)):
  13388. found = 1
  13389. break
  13390. annot = mupdf.pdf_next_annot( annot)
  13391. if not found:
  13392. raise Exception("xref %d is not an annot of this page" % xref)
  13393. return annot
  13394. def JM_get_annot_by_name(page, name):
  13395. '''
  13396. retrieve annot by name (/NM key)
  13397. '''
  13398. assert isinstance(page, mupdf.PdfPage)
  13399. if not name:
  13400. return
  13401. found = 0
  13402. # loop thru MuPDF's internal annots and widget arrays
  13403. annot = mupdf.pdf_first_annot(page)
  13404. while 1:
  13405. if not annot.m_internal:
  13406. break
  13407. response, len_ = mupdf.pdf_to_string(mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM"))
  13408. if name == response:
  13409. found = 1
  13410. break
  13411. annot = mupdf.pdf_next_annot(annot)
  13412. if not found:
  13413. raise Exception("'%s' is not an annot of this page" % name)
  13414. return annot
  13415. def JM_get_annot_id_list(page):
  13416. names = []
  13417. annots = mupdf.pdf_dict_get( page.obj(), mupdf.PDF_ENUM_NAME_Annots)
  13418. if not annots.m_internal:
  13419. return names
  13420. for i in range( mupdf.pdf_array_len(annots)):
  13421. annot_obj = mupdf.pdf_array_get(annots, i)
  13422. name = mupdf.pdf_dict_gets(annot_obj, "NM")
  13423. if name.m_internal:
  13424. names.append(
  13425. mupdf.pdf_to_text_string(name)
  13426. )
  13427. return names
  13428. def JM_get_annot_xref_list( page_obj):
  13429. '''
  13430. return the xrefs and /NM ids of a page's annots, links and fields
  13431. '''
  13432. if g_use_extra:
  13433. names = extra.JM_get_annot_xref_list( page_obj)
  13434. return names
  13435. names = []
  13436. annots = mupdf.pdf_dict_get( page_obj, PDF_NAME('Annots'))
  13437. n = mupdf.pdf_array_len( annots)
  13438. for i in range( n):
  13439. annot_obj = mupdf.pdf_array_get( annots, i)
  13440. xref = mupdf.pdf_to_num( annot_obj)
  13441. subtype = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Subtype'))
  13442. if not subtype.m_internal:
  13443. continue # subtype is required
  13444. type_ = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subtype))
  13445. if type_ == mupdf.PDF_ANNOT_UNKNOWN:
  13446. continue # only accept valid annot types
  13447. id_ = mupdf.pdf_dict_gets( annot_obj, "NM")
  13448. names.append( (xref, type_, mupdf.pdf_to_text_string( id_)))
  13449. return names
  13450. def JM_get_annot_xref_list2(page):
  13451. page = page._pdf_page(required=False)
  13452. if not page.m_internal:
  13453. return list()
  13454. return JM_get_annot_xref_list( page.obj())
  13455. def JM_get_border_style(style):
  13456. '''
  13457. return pdf_obj "border style" from Python str
  13458. '''
  13459. val = mupdf.PDF_ENUM_NAME_S
  13460. if style is None:
  13461. return val
  13462. s = style
  13463. if s.startswith("b") or s.startswith("B"): val = mupdf.PDF_ENUM_NAME_B
  13464. elif s.startswith("d") or s.startswith("D"): val = mupdf.PDF_ENUM_NAME_D
  13465. elif s.startswith("i") or s.startswith("I"): val = mupdf.PDF_ENUM_NAME_I
  13466. elif s.startswith("u") or s.startswith("U"): val = mupdf.PDF_ENUM_NAME_U
  13467. elif s.startswith("s") or s.startswith("S"): val = mupdf.PDF_ENUM_NAME_S
  13468. return val
  13469. def JM_get_font(
  13470. fontname,
  13471. fontfile,
  13472. fontbuffer,
  13473. script,
  13474. lang,
  13475. ordering,
  13476. is_bold,
  13477. is_italic,
  13478. is_serif,
  13479. embed,
  13480. ):
  13481. '''
  13482. return a fz_font from a number of parameters
  13483. '''
  13484. def fertig(font):
  13485. if not font.m_internal:
  13486. raise RuntimeError(MSG_FONT_FAILED)
  13487. # if font allows this, set embedding
  13488. if not font.m_internal.flags.never_embed:
  13489. mupdf.fz_set_font_embedding(font, embed)
  13490. return font
  13491. index = 0
  13492. font = None
  13493. if fontfile:
  13494. #goto have_file;
  13495. font = mupdf.fz_new_font_from_file( None, fontfile, index, 0)
  13496. return fertig(font)
  13497. if fontbuffer:
  13498. #goto have_buffer;
  13499. res = JM_BufferFromBytes(fontbuffer)
  13500. font = mupdf.fz_new_font_from_buffer( None, res, index, 0)
  13501. return fertig(font)
  13502. if ordering > -1:
  13503. # goto have_cjk;
  13504. font = mupdf.fz_new_cjk_font(ordering)
  13505. return fertig(font)
  13506. if fontname:
  13507. # goto have_base14;
  13508. # Base-14 or a MuPDF builtin font
  13509. font = mupdf.fz_new_base14_font(fontname)
  13510. if font.m_internal:
  13511. return fertig(font)
  13512. font = mupdf.fz_new_builtin_font(fontname, is_bold, is_italic)
  13513. return fertig(font)
  13514. # Check for NOTO font
  13515. #have_noto:;
  13516. data, size, index = mupdf.fz_lookup_noto_font( script, lang)
  13517. font = None
  13518. if data:
  13519. font = mupdf.fz_new_font_from_memory( None, data, size, index, 0)
  13520. if font.m_internal:
  13521. return fertig(font)
  13522. font = mupdf.fz_load_fallback_font( script, lang, is_serif, is_bold, is_italic)
  13523. return fertig(font)
  13524. def JM_get_fontbuffer(doc, xref):
  13525. '''
  13526. Return the contents of a font file, identified by xref
  13527. '''
  13528. if xref < 1:
  13529. return
  13530. o = mupdf.pdf_load_object(doc, xref)
  13531. desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
  13532. if desft.m_internal:
  13533. obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
  13534. obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
  13535. else:
  13536. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
  13537. if not obj.m_internal:
  13538. message(f"invalid font - FontDescriptor missing")
  13539. return
  13540. o = obj
  13541. stream = None
  13542. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
  13543. if obj.m_internal:
  13544. stream = obj # ext = "pfa"
  13545. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
  13546. if obj.m_internal:
  13547. stream = obj # ext = "ttf"
  13548. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
  13549. if obj.m_internal:
  13550. stream = obj
  13551. obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
  13552. if obj.m_internal and not mupdf.pdf_is_name(obj):
  13553. message("invalid font descriptor subtype")
  13554. return
  13555. if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
  13556. pass # Prev code did: ext = "cff", but this has no effect.
  13557. elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
  13558. pass # Prev code did: ext = "cid", but this has no effect.
  13559. elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
  13560. pass # Prev code did: ext = "otf", but this has no effect. */
  13561. else:
  13562. message('warning: unhandled font type {pdf_to_name(ctx, obj)!r}')
  13563. if not stream:
  13564. message('warning: unhandled font type')
  13565. return
  13566. return mupdf.pdf_load_stream(stream)
  13567. def JM_get_resource_properties(ref):
  13568. '''
  13569. Return the items of Resources/Properties (used for Marked Content)
  13570. Argument may be e.g. a page object or a Form XObject
  13571. '''
  13572. properties = mupdf.pdf_dict_getl(ref, PDF_NAME('Resources'), PDF_NAME('Properties'))
  13573. if not properties.m_internal:
  13574. return ()
  13575. else:
  13576. n = mupdf.pdf_dict_len(properties)
  13577. if n < 1:
  13578. return ()
  13579. rc = []
  13580. for i in range(n):
  13581. key = mupdf.pdf_dict_get_key(properties, i)
  13582. val = mupdf.pdf_dict_get_val(properties, i)
  13583. c = mupdf.pdf_to_name(key)
  13584. xref = mupdf.pdf_to_num(val)
  13585. rc.append((c, xref))
  13586. return rc
  13587. def JM_get_widget_by_xref( page, xref):
  13588. '''
  13589. retrieve widget by its xref
  13590. '''
  13591. found = False
  13592. annot = mupdf.pdf_first_widget( page)
  13593. while annot.m_internal:
  13594. annot_obj = mupdf.pdf_annot_obj( annot)
  13595. if xref == mupdf.pdf_to_num( annot_obj):
  13596. found = True
  13597. break
  13598. annot = mupdf.pdf_next_widget( annot)
  13599. if not found:
  13600. raise Exception( f"xref {xref} is not a widget of this page")
  13601. return Annot( annot)
  13602. def JM_get_widget_properties(annot, Widget):
  13603. '''
  13604. Populate a Python Widget object with the values from a PDF form field.
  13605. Called by "Page.first_widget" and "Widget.next".
  13606. '''
  13607. #log( '{type(annot)=}')
  13608. annot_obj = mupdf.pdf_annot_obj(annot.this)
  13609. #log( 'Have called mupdf.pdf_annot_obj()')
  13610. page = _pdf_annot_page(annot.this)
  13611. pdf = page.doc()
  13612. tw = annot
  13613. def SETATTR(key, value):
  13614. setattr(Widget, key, value)
  13615. def SETATTR_DROP(mod, key, value):
  13616. # Original C code for this function deletes if PyObject* is NULL. We
  13617. # don't have a representation for that in Python - e.g. None is not
  13618. # represented by NULL.
  13619. setattr(mod, key, value)
  13620. #log( '=== + mupdf.pdf_widget_type(tw)')
  13621. field_type = mupdf.pdf_widget_type(tw.this)
  13622. #log( '=== - mupdf.pdf_widget_type(tw)')
  13623. Widget.field_type = field_type
  13624. if field_type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  13625. if mupdf.pdf_signature_is_signed(pdf, annot_obj):
  13626. SETATTR("is_signed", True)
  13627. else:
  13628. SETATTR("is_signed",False)
  13629. else:
  13630. SETATTR("is_signed", None)
  13631. SETATTR_DROP(Widget, "border_style", JM_UnicodeFromStr(mupdf.pdf_field_border_style(annot_obj)))
  13632. SETATTR_DROP(Widget, "field_type_string", JM_UnicodeFromStr(JM_field_type_text(field_type)))
  13633. field_name = mupdf.pdf_load_field_name(annot_obj)
  13634. SETATTR_DROP(Widget, "field_name", field_name)
  13635. def pdf_dict_get_inheritable_nonempty_label(node, key):
  13636. '''
  13637. This is a modified version of MuPDF's pdf_dict_get_inheritable(), with
  13638. some changes:
  13639. * Returns string from pdf_to_text_string() or None if not found.
  13640. * Recurses to parent if current node exists but with empty string
  13641. value.
  13642. '''
  13643. slow = node
  13644. halfbeat = 11 # Don't start moving slow pointer for a while.
  13645. while 1:
  13646. if not node.m_internal:
  13647. return
  13648. val = mupdf.pdf_dict_get(node, key)
  13649. if val.m_internal:
  13650. label = mupdf.pdf_to_text_string(val)
  13651. if label:
  13652. return label
  13653. node = mupdf.pdf_dict_get(node, PDF_NAME('Parent'))
  13654. if node.m_internal == slow.m_internal:
  13655. raise Exception("cycle in resources")
  13656. halfbeat -= 1
  13657. if halfbeat == 0:
  13658. slow = mupdf.pdf_dict_get(slow, PDF_NAME('Parent'))
  13659. halfbeat = 2
  13660. # In order to address #3950, we use our modified pdf_dict_get_inheritable()
  13661. # to ignore empty-string child values.
  13662. label = pdf_dict_get_inheritable_nonempty_label(annot_obj, PDF_NAME('TU'))
  13663. if label is not None:
  13664. SETATTR_DROP(Widget, "field_label", label)
  13665. fvalue = None
  13666. if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  13667. obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Parent')) # owning RB group
  13668. if obj.m_internal:
  13669. SETATTR_DROP(Widget, "rb_parent", mupdf.pdf_to_num( obj))
  13670. obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('AS'))
  13671. if obj.m_internal:
  13672. fvalue = mupdf.pdf_to_name(obj)
  13673. if not fvalue:
  13674. fvalue = mupdf.pdf_field_value(annot_obj)
  13675. SETATTR_DROP(Widget, "field_value", JM_UnicodeFromStr(fvalue))
  13676. SETATTR_DROP(Widget, "field_display", mupdf.pdf_field_display(annot_obj))
  13677. border_width = mupdf.pdf_to_real(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('W')))
  13678. if border_width == 0:
  13679. border_width = 1
  13680. SETATTR_DROP(Widget, "border_width", border_width)
  13681. obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('D'))
  13682. if mupdf.pdf_is_array(obj):
  13683. n = mupdf.pdf_array_len(obj)
  13684. d = [0] * n
  13685. for i in range(n):
  13686. d[i] = mupdf.pdf_to_int(mupdf.pdf_array_get(obj, i))
  13687. SETATTR_DROP(Widget, "border_dashes", d)
  13688. SETATTR_DROP(Widget, "text_maxlen", mupdf.pdf_text_widget_max_len(tw.this))
  13689. SETATTR_DROP(Widget, "text_format", mupdf.pdf_text_widget_format(tw.this))
  13690. obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BG'))
  13691. if mupdf.pdf_is_array(obj):
  13692. n = mupdf.pdf_array_len(obj)
  13693. col = [0] * n
  13694. for i in range(n):
  13695. col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
  13696. SETATTR_DROP(Widget, "fill_color", col)
  13697. obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BC'))
  13698. if mupdf.pdf_is_array(obj):
  13699. n = mupdf.pdf_array_len(obj)
  13700. col = [0] * n
  13701. for i in range(n):
  13702. col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
  13703. SETATTR_DROP(Widget, "border_color", col)
  13704. SETATTR_DROP(Widget, "choice_values", JM_choice_options(annot))
  13705. da = mupdf.pdf_to_text_string(mupdf.pdf_dict_get_inheritable(annot_obj, PDF_NAME('DA')))
  13706. SETATTR_DROP(Widget, "_text_da", JM_UnicodeFromStr(da))
  13707. obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('CA'))
  13708. if obj.m_internal:
  13709. SETATTR_DROP(Widget, "button_caption", JM_UnicodeFromStr(mupdf.pdf_to_text_string(obj)))
  13710. SETATTR_DROP(Widget, "field_flags", mupdf.pdf_field_flags(annot_obj))
  13711. # call Py method to reconstruct text color, font name, size
  13712. Widget._parse_da()
  13713. # extract JavaScript action texts
  13714. s = mupdf.pdf_dict_get(annot_obj, PDF_NAME('A'))
  13715. ss = JM_get_script(s)
  13716. SETATTR_DROP(Widget, "script", ss)
  13717. SETATTR_DROP(Widget, "script_stroke",
  13718. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('K')))
  13719. )
  13720. SETATTR_DROP(Widget, "script_format",
  13721. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('F')))
  13722. )
  13723. SETATTR_DROP(Widget, "script_change",
  13724. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('V')))
  13725. )
  13726. SETATTR_DROP(Widget, "script_calc",
  13727. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('C')))
  13728. )
  13729. SETATTR_DROP(Widget, "script_blur",
  13730. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl')))
  13731. )
  13732. SETATTR_DROP(Widget, "script_focus",
  13733. JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo')))
  13734. )
  13735. def JM_get_fontextension(doc, xref):
  13736. '''
  13737. Return the file extension of a font file, identified by xref
  13738. '''
  13739. if xref < 1:
  13740. return "n/a"
  13741. o = mupdf.pdf_load_object(doc, xref)
  13742. desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
  13743. if desft.m_internal:
  13744. obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
  13745. obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
  13746. else:
  13747. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
  13748. if not obj.m_internal:
  13749. return "n/a" # this is a base-14 font
  13750. o = obj # we have the FontDescriptor
  13751. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
  13752. if obj.m_internal:
  13753. return "pfa"
  13754. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
  13755. if obj.m_internal:
  13756. return "ttf"
  13757. obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
  13758. if obj.m_internal:
  13759. obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
  13760. if obj.m_internal and not mupdf.pdf_is_name(obj):
  13761. message("invalid font descriptor subtype")
  13762. return "n/a"
  13763. if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
  13764. return "cff"
  13765. elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
  13766. return "cid"
  13767. elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
  13768. return "otf"
  13769. else:
  13770. message("unhandled font type '%s'", mupdf.pdf_to_name(obj))
  13771. return "n/a"
  13772. def JM_get_ocg_arrays_imp(arr):
  13773. '''
  13774. Get OCG arrays from OC configuration
  13775. Returns dict {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list}
  13776. '''
  13777. list_ = list()
  13778. if mupdf.pdf_is_array( arr):
  13779. n = mupdf.pdf_array_len( arr)
  13780. for i in range(n):
  13781. obj = mupdf.pdf_array_get( arr, i)
  13782. item = mupdf.pdf_to_num( obj)
  13783. if item not in list_:
  13784. list_.append(item)
  13785. return list_
  13786. def JM_get_ocg_arrays(conf):
  13787. rc = dict()
  13788. arr = mupdf.pdf_dict_get( conf, PDF_NAME('ON'))
  13789. list_ = JM_get_ocg_arrays_imp( arr)
  13790. if list_:
  13791. rc["on"] = list_
  13792. arr = mupdf.pdf_dict_get( conf, PDF_NAME('OFF'))
  13793. list_ = JM_get_ocg_arrays_imp( arr)
  13794. if list_:
  13795. rc["off"] = list_
  13796. arr = mupdf.pdf_dict_get( conf, PDF_NAME('Locked'))
  13797. list_ = JM_get_ocg_arrays_imp( arr)
  13798. if list_:
  13799. rc['locked'] = list_
  13800. list_ = list()
  13801. arr = mupdf.pdf_dict_get( conf, PDF_NAME('RBGroups'))
  13802. if mupdf.pdf_is_array( arr):
  13803. n = mupdf.pdf_array_len( arr)
  13804. for i in range(n):
  13805. obj = mupdf.pdf_array_get( arr, i)
  13806. list1 = JM_get_ocg_arrays_imp( obj)
  13807. list_.append(list1)
  13808. if list_:
  13809. rc["rbgroups"] = list_
  13810. obj = mupdf.pdf_dict_get( conf, PDF_NAME('BaseState'))
  13811. if obj.m_internal:
  13812. state = mupdf.pdf_to_name( obj)
  13813. rc["basestate"] = state
  13814. return rc
  13815. def JM_get_page_labels(liste, nums):
  13816. n = mupdf.pdf_array_len(nums)
  13817. for i in range(0, n, 2):
  13818. key = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i))
  13819. pno = mupdf.pdf_to_int(key)
  13820. val = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i + 1))
  13821. res = JM_object_to_buffer(val, 1, 0)
  13822. c = mupdf.fz_buffer_extract(res)
  13823. assert isinstance(c, bytes)
  13824. c = c.decode('utf-8')
  13825. liste.append( (pno, c))
  13826. def JM_get_script(key):
  13827. '''
  13828. JavaScript extractor
  13829. Returns either the script source or None. Parameter is a PDF action
  13830. dictionary, which must have keys /S and /JS. The value of /S must be
  13831. '/JavaScript'. The value of /JS is returned.
  13832. '''
  13833. if not key.m_internal:
  13834. return
  13835. j = mupdf.pdf_dict_get(key, PDF_NAME('S'))
  13836. jj = mupdf.pdf_to_name(j)
  13837. if jj == "JavaScript":
  13838. js = mupdf.pdf_dict_get(key, PDF_NAME('JS'))
  13839. if not js.m_internal:
  13840. return
  13841. else:
  13842. return
  13843. if mupdf.pdf_is_string(js):
  13844. script = JM_UnicodeFromStr(mupdf.pdf_to_text_string(js))
  13845. elif mupdf.pdf_is_stream(js):
  13846. res = mupdf.pdf_load_stream(js)
  13847. script = JM_EscapeStrFromBuffer(res)
  13848. else:
  13849. return
  13850. if script: # do not return an empty script
  13851. return script
  13852. return
  13853. def JM_have_operation(pdf):
  13854. '''
  13855. Ensure valid journalling state
  13856. '''
  13857. if pdf.m_internal.journal and not mupdf.pdf_undoredo_step(pdf, 0):
  13858. return 0
  13859. return 1
  13860. def JM_image_extension(type_):
  13861. '''
  13862. return extension for MuPDF image type
  13863. '''
  13864. if type_ == mupdf.FZ_IMAGE_FAX: return "fax"
  13865. if type_ == mupdf.FZ_IMAGE_RAW: return "raw"
  13866. if type_ == mupdf.FZ_IMAGE_FLATE: return "flate"
  13867. if type_ == mupdf.FZ_IMAGE_LZW: return "lzw"
  13868. if type_ == mupdf.FZ_IMAGE_RLD: return "rld"
  13869. if type_ == mupdf.FZ_IMAGE_BMP: return "bmp"
  13870. if type_ == mupdf.FZ_IMAGE_GIF: return "gif"
  13871. if type_ == mupdf.FZ_IMAGE_JBIG2: return "jb2"
  13872. if type_ == mupdf.FZ_IMAGE_JPEG: return "jpeg"
  13873. if type_ == mupdf.FZ_IMAGE_JPX: return "jpx"
  13874. if type_ == mupdf.FZ_IMAGE_JXR: return "jxr"
  13875. if type_ == mupdf.FZ_IMAGE_PNG: return "png"
  13876. if type_ == mupdf.FZ_IMAGE_PNM: return "pnm"
  13877. if type_ == mupdf.FZ_IMAGE_TIFF: return "tiff"
  13878. #if type_ == mupdf.FZ_IMAGE_PSD: return "psd"
  13879. return "n/a"
  13880. # fixme: need to avoid using a global for this.
  13881. g_img_info = None
  13882. def JM_image_filter(opaque, ctm, name, image):
  13883. assert isinstance(ctm, mupdf.FzMatrix)
  13884. r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  13885. q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm)
  13886. q = mupdf.fz_transform_quad( q, g_img_info_matrix)
  13887. temp = name, JM_py_from_quad(q)
  13888. g_img_info.append(temp)
  13889. def JM_image_profile( imagedata, keep_image):
  13890. '''
  13891. Return basic properties of an image provided as bytes or bytearray
  13892. The function creates an fz_image and optionally returns it.
  13893. '''
  13894. if not imagedata:
  13895. return None # nothing given
  13896. len_ = len( imagedata)
  13897. if len_ < 8:
  13898. message( "bad image data")
  13899. return None
  13900. c = imagedata
  13901. #log( 'calling mfz_recognize_image_format with {c!r=}')
  13902. type_ = mupdf.fz_recognize_image_format( c)
  13903. if type_ == mupdf.FZ_IMAGE_UNKNOWN:
  13904. return None
  13905. if keep_image:
  13906. res = mupdf.fz_new_buffer_from_copied_data( c, len_)
  13907. else:
  13908. res = mupdf.fz_new_buffer_from_shared_data( c, len_)
  13909. image = mupdf.fz_new_image_from_buffer( res)
  13910. ctm = mupdf.fz_image_orientation_matrix( image)
  13911. xres, yres = mupdf.fz_image_resolution(image)
  13912. orientation = mupdf.fz_image_orientation( image)
  13913. cs_name = mupdf.fz_colorspace_name( image.colorspace())
  13914. result = dict()
  13915. result[ dictkey_width] = image.w()
  13916. result[ dictkey_height] = image.h()
  13917. result[ "orientation"] = orientation
  13918. result[ dictkey_matrix] = JM_py_from_matrix(ctm)
  13919. result[ dictkey_xres] = xres
  13920. result[ dictkey_yres] = yres
  13921. result[ dictkey_colorspace] = image.n()
  13922. result[ dictkey_bpc] = image.bpc()
  13923. result[ dictkey_ext] = JM_image_extension(type_)
  13924. result[ dictkey_cs_name] = cs_name
  13925. if keep_image:
  13926. result[ dictkey_image] = image
  13927. return result
  13928. def JM_image_reporter(page):
  13929. doc = page.doc()
  13930. global g_img_info_matrix
  13931. g_img_info_matrix = mupdf.FzMatrix()
  13932. mediabox = mupdf.FzRect()
  13933. mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix)
  13934. class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
  13935. def __init__(self):
  13936. super().__init__()
  13937. self.use_virtual_image_filter()
  13938. def image_filter(self, ctx, ctm, name, image, scissor):
  13939. JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
  13940. sanitize_filter_options = SanitizeFilterOptions()
  13941. filter_options = _make_PdfFilterOptions(
  13942. instance_forms=1,
  13943. ascii=1,
  13944. no_update=1,
  13945. sanitize=1,
  13946. sopts=sanitize_filter_options,
  13947. )
  13948. global g_img_info
  13949. g_img_info = []
  13950. mupdf.pdf_filter_page_contents( doc, page, filter_options)
  13951. rc = tuple(g_img_info)
  13952. g_img_info = []
  13953. return rc
  13954. def JM_fitz_config():
  13955. have_TOFU = not hasattr(mupdf, 'TOFU')
  13956. have_TOFU_BASE14 = not hasattr(mupdf, 'TOFU_BASE14')
  13957. have_TOFU_CJK = not hasattr(mupdf, 'TOFU_CJK')
  13958. have_TOFU_CJK_EXT = not hasattr(mupdf, 'TOFU_CJK_EXT')
  13959. have_TOFU_CJK_LANG = not hasattr(mupdf, 'TOFU_CJK_LANG')
  13960. have_TOFU_EMOJI = not hasattr(mupdf, 'TOFU_EMOJI')
  13961. have_TOFU_HISTORIC = not hasattr(mupdf, 'TOFU_HISTORIC')
  13962. have_TOFU_SIL = not hasattr(mupdf, 'TOFU_SIL')
  13963. have_TOFU_SYMBOL = not hasattr(mupdf, 'TOFU_SYMBOL')
  13964. ret = dict()
  13965. ret["base14"] = have_TOFU_BASE14
  13966. ret["cbz"] = bool(mupdf.FZ_ENABLE_CBZ)
  13967. ret["epub"] = bool(mupdf.FZ_ENABLE_EPUB)
  13968. ret["html"] = bool(mupdf.FZ_ENABLE_HTML)
  13969. ret["icc"] = bool(mupdf.FZ_ENABLE_ICC)
  13970. ret["img"] = bool(mupdf.FZ_ENABLE_IMG)
  13971. ret["jpx"] = bool(mupdf.FZ_ENABLE_JPX)
  13972. ret["js"] = bool(mupdf.FZ_ENABLE_JS)
  13973. ret["pdf"] = bool(mupdf.FZ_ENABLE_PDF)
  13974. ret["plotter-cmyk"] = bool(mupdf.FZ_PLOTTERS_CMYK)
  13975. ret["plotter-g"] = bool(mupdf.FZ_PLOTTERS_G)
  13976. ret["plotter-n"] = bool(mupdf.FZ_PLOTTERS_N)
  13977. ret["plotter-rgb"] = bool(mupdf.FZ_PLOTTERS_RGB)
  13978. ret["py-memory"] = bool(JM_MEMORY)
  13979. ret["svg"] = bool(mupdf.FZ_ENABLE_SVG)
  13980. ret["tofu"] = have_TOFU
  13981. ret["tofu-cjk"] = have_TOFU_CJK
  13982. ret["tofu-cjk-ext"] = have_TOFU_CJK_EXT
  13983. ret["tofu-cjk-lang"] = have_TOFU_CJK_LANG
  13984. ret["tofu-emoji"] = have_TOFU_EMOJI
  13985. ret["tofu-historic"] = have_TOFU_HISTORIC
  13986. ret["tofu-sil"] = have_TOFU_SIL
  13987. ret["tofu-symbol"] = have_TOFU_SYMBOL
  13988. ret["xps"] = bool(mupdf.FZ_ENABLE_XPS)
  13989. return ret
  13990. def JM_insert_contents(pdf, pageref, newcont, overlay):
  13991. '''
  13992. Insert a buffer as a new separate /Contents object of a page.
  13993. 1. Create a new stream object from buffer 'newcont'
  13994. 2. If /Contents already is an array, then just prepend or append this object
  13995. 3. Else, create new array and put old content obj and this object into it.
  13996. If the page had no /Contents before, just create a 1-item array.
  13997. '''
  13998. contents = mupdf.pdf_dict_get(pageref, PDF_NAME('Contents'))
  13999. newconts = mupdf.pdf_add_stream(pdf, newcont, mupdf.PdfObj(), 0)
  14000. xref = mupdf.pdf_to_num(newconts)
  14001. if mupdf.pdf_is_array(contents):
  14002. if overlay: # append new object
  14003. mupdf.pdf_array_push(contents, newconts)
  14004. else: # prepend new object
  14005. mupdf.pdf_array_insert(contents, newconts, 0)
  14006. else:
  14007. carr = mupdf.pdf_new_array(pdf, 5)
  14008. if overlay:
  14009. if contents.m_internal:
  14010. mupdf.pdf_array_push(carr, contents)
  14011. mupdf.pdf_array_push(carr, newconts)
  14012. else:
  14013. mupdf.pdf_array_push(carr, newconts)
  14014. if contents.m_internal:
  14015. mupdf.pdf_array_push(carr, contents)
  14016. mupdf.pdf_dict_put(pageref, PDF_NAME('Contents'), carr)
  14017. return xref
  14018. def JM_insert_font(pdf, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
  14019. '''
  14020. Insert a font in a PDF
  14021. '''
  14022. font = None
  14023. res = None
  14024. data = None
  14025. ixref = 0
  14026. index = 0
  14027. simple = 0
  14028. value=None
  14029. name=None
  14030. subt=None
  14031. exto = None
  14032. ENSURE_OPERATION(pdf)
  14033. # check for CJK font
  14034. if ordering > -1:
  14035. data, size, index = mupdf.fz_lookup_cjk_font(ordering)
  14036. if data:
  14037. font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
  14038. font_obj = mupdf.pdf_add_cjk_font(pdf, font, ordering, wmode, serif)
  14039. exto = "n/a"
  14040. simple = 0
  14041. #goto weiter;
  14042. else:
  14043. # check for PDF Base-14 font
  14044. if bfname:
  14045. data, size = mupdf.fz_lookup_base14_font(bfname)
  14046. if data:
  14047. font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
  14048. font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
  14049. exto = "n/a"
  14050. simple = 1
  14051. #goto weiter;
  14052. else:
  14053. if fontfile:
  14054. font = mupdf.fz_new_font_from_file(None, fontfile, idx, 0)
  14055. else:
  14056. res = JM_BufferFromBytes(fontbuffer)
  14057. if not res.m_internal:
  14058. RAISEPY(MSG_FILE_OR_BUFFER, PyExc_ValueError)
  14059. font = mupdf.fz_new_font_from_buffer(None, res, idx, 0)
  14060. if not set_simple:
  14061. font_obj = mupdf.pdf_add_cid_font(pdf, font)
  14062. simple = 0
  14063. else:
  14064. font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
  14065. simple = 2
  14066. #weiter: ;
  14067. ixref = mupdf.pdf_to_num(font_obj)
  14068. name = JM_EscapeStrFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get(font_obj, PDF_NAME('BaseFont'))))
  14069. subt = JM_UnicodeFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get( font_obj, PDF_NAME('Subtype'))))
  14070. if not exto:
  14071. exto = JM_UnicodeFromStr(JM_get_fontextension(pdf, ixref))
  14072. asc = mupdf.fz_font_ascender(font)
  14073. dsc = mupdf.fz_font_descender(font)
  14074. value = [
  14075. ixref,
  14076. {
  14077. "name": name, # base font name
  14078. "type": subt, # subtype
  14079. "ext": exto, # file extension
  14080. "simple": bool(simple), # simple font?
  14081. "ordering": ordering, # CJK font?
  14082. "ascender": asc,
  14083. "descender": dsc,
  14084. },
  14085. ]
  14086. return value
  14087. def JM_irect_from_py(r):
  14088. '''
  14089. PySequence to mupdf.FzIrect. Default: infinite irect
  14090. '''
  14091. if isinstance(r, mupdf.FzIrect):
  14092. return r
  14093. if isinstance(r, IRect):
  14094. r = mupdf.FzIrect( r.x0, r.y0, r.x1, r.y1)
  14095. return r
  14096. if isinstance(r, Rect):
  14097. ret = mupdf.FzRect(r.x0, r.y0, r.x1, r.y1)
  14098. ret = mupdf.FzIrect(ret) # Uses fz_irect_from_rect().
  14099. return ret
  14100. if isinstance(r, mupdf.FzRect):
  14101. ret = mupdf.FzIrect(r) # Uses fz_irect_from_rect().
  14102. return ret
  14103. if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
  14104. return mupdf.FzIrect(mupdf.fz_infinite_irect)
  14105. f = [0, 0, 0, 0]
  14106. for i in range(4):
  14107. f[i] = r[i]
  14108. if f[i] is None:
  14109. return mupdf.FzIrect(mupdf.fz_infinite_irect)
  14110. if f[i] < FZ_MIN_INF_RECT:
  14111. f[i] = FZ_MIN_INF_RECT
  14112. if f[i] > FZ_MAX_INF_RECT:
  14113. f[i] = FZ_MAX_INF_RECT
  14114. return mupdf.fz_make_irect(f[0], f[1], f[2], f[3])
  14115. def JM_listbox_value( annot):
  14116. '''
  14117. ListBox retrieve value
  14118. '''
  14119. # may be single value or array
  14120. annot_obj = mupdf.pdf_annot_obj( annot)
  14121. optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('V'))
  14122. if mupdf.pdf_is_string( optarr): # a single string
  14123. return mupdf.pdf_to_text_string( optarr)
  14124. # value is an array (may have len 0)
  14125. n = mupdf.pdf_array_len( optarr)
  14126. liste = []
  14127. # extract a list of strings
  14128. # each entry may again be an array: take second entry then
  14129. for i in range( n):
  14130. elem = mupdf.pdf_array_get( optarr, i)
  14131. if mupdf.pdf_is_array( elem):
  14132. elem = mupdf.pdf_array_get( elem, 1)
  14133. liste.append( JM_UnicodeFromStr( mupdf.pdf_to_text_string( elem)))
  14134. return liste
  14135. def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
  14136. # PyMuPDF uses a fz_buffer to build up the string, but it's non-trivial to
  14137. # convert the fz_buffer's `unsigned char*` into a `const char*` suitable
  14138. # for passing to pdf_dict_put_text_string(). So instead we build up the
  14139. # string directly in Python.
  14140. buf = ''
  14141. if ncol < 1:
  14142. buf += f'0 g '
  14143. elif ncol == 1:
  14144. buf += f'{col[0]:g} g '
  14145. elif ncol == 2:
  14146. assert 0
  14147. elif ncol == 3:
  14148. buf += f'{col[0]:g} {col[1]:g} {col[2]:g} rg '
  14149. else:
  14150. buf += f'{col[0]:g} {col[1]:g} {col[2]:g} {col[3]:g} k '
  14151. buf += f'/{JM_expand_fname(fontname)} {fontsize} Tf'
  14152. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf)
  14153. def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
  14154. if g_use_extra:
  14155. return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
  14156. char_list = None
  14157. span_list = []
  14158. mupdf.fz_clear_buffer(buff)
  14159. span_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  14160. line_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  14161. class char_style:
  14162. def __init__(self, rhs=None):
  14163. if rhs:
  14164. self.size = rhs.size
  14165. self.flags = rhs.flags
  14166. if mupdf_version_tuple >= (1, 25, 2):
  14167. self.char_flags = rhs.char_flags
  14168. self.font = rhs.font
  14169. self.argb = rhs.argb
  14170. self.asc = rhs.asc
  14171. self.desc = rhs.desc
  14172. self.bidi = rhs.bidi
  14173. else:
  14174. self.size = -1
  14175. self.flags = -1
  14176. if mupdf_version_tuple >= (1, 25, 2):
  14177. self.char_flags = -1
  14178. self.font = ''
  14179. self.argb = -1
  14180. self.asc = 0
  14181. self.desc = 0
  14182. self.bidi = 0
  14183. def __str__(self):
  14184. ret = f'{self.size} {self.flags}'
  14185. if mupdf_version_tuple >= (1, 25, 2):
  14186. ret += f' {self.char_flags}'
  14187. ret += f' {self.font} {self.color} {self.asc} {self.desc}'
  14188. return ret
  14189. old_style = char_style()
  14190. style = char_style()
  14191. span = None
  14192. span_origin = None
  14193. for ch in line:
  14194. # start-trace
  14195. r = JM_char_bbox(line, ch)
  14196. if (not JM_rects_overlap(tp_rect, r)
  14197. and not mupdf.fz_is_infinite_rect(tp_rect)
  14198. ):
  14199. continue
  14200. # Info from:
  14201. # detect_super_script()
  14202. # fz_font_is_italic()
  14203. # fz_font_is_serif()
  14204. # fz_font_is_monospaced()
  14205. # fz_font_is_bold()
  14206. flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
  14207. origin = mupdf.FzPoint(ch.m_internal.origin)
  14208. style.size = ch.m_internal.size
  14209. style.flags = flags
  14210. if mupdf_version_tuple >= (1, 25, 2):
  14211. # FZ_STEXT_SYNTHETIC is per-char, not per-span.
  14212. style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC
  14213. style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
  14214. style.argb = ch.m_internal.argb
  14215. style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
  14216. style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
  14217. style.bidi = ch.m_internal.bidi
  14218. if (style.size != old_style.size
  14219. or style.flags != old_style.flags
  14220. or (mupdf_version_tuple >= (1, 25, 2)
  14221. and (style.char_flags != old_style.char_flags)
  14222. )
  14223. or style.argb != old_style.argb
  14224. or style.font != old_style.font
  14225. or style.bidi != old_style.bidi
  14226. ):
  14227. if old_style.size >= 0:
  14228. # not first one, output previous
  14229. if raw:
  14230. # put character list in the span
  14231. span[dictkey_chars] = char_list
  14232. char_list = None
  14233. else:
  14234. # put text string in the span
  14235. span[dictkey_text] = JM_EscapeStrFromBuffer( buff)
  14236. mupdf.fz_clear_buffer(buff)
  14237. span[dictkey_origin] = JM_py_from_point(span_origin)
  14238. span[dictkey_bbox] = JM_py_from_rect(span_rect)
  14239. line_rect = mupdf.fz_union_rect(line_rect, span_rect)
  14240. span_list.append( span)
  14241. span = None
  14242. span = dict()
  14243. asc = style.asc
  14244. desc = style.desc
  14245. if style.asc < 1e-3:
  14246. asc = 0.9
  14247. desc = -0.1
  14248. span[dictkey_size] = style.size
  14249. span[dictkey_flags] = style.flags
  14250. span[dictkey_bidi] = style.bidi
  14251. if mupdf_version_tuple >= (1, 25, 2):
  14252. span[dictkey_char_flags] = style.char_flags
  14253. span[dictkey_font] = JM_EscapeStrFromStr(style.font)
  14254. span[dictkey_color] = style.argb & 0xffffff
  14255. if mupdf_version_tuple >= (1, 25, 0):
  14256. span['alpha'] = style.argb >> 24
  14257. span["ascender"] = asc
  14258. span["descender"] = desc
  14259. # Need to be careful here - doing 'old_style=style' does a shallow
  14260. # copy, but we need to keep old_style as a distinct instance.
  14261. old_style = char_style(style)
  14262. span_rect = r
  14263. span_origin = origin
  14264. span_rect = mupdf.fz_union_rect(span_rect, r)
  14265. if raw: # make and append a char dict
  14266. char_dict = dict()
  14267. char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
  14268. char_dict[dictkey_bbox] = JM_py_from_rect(r)
  14269. char_dict[dictkey_c] = chr(ch.m_internal.c)
  14270. char_dict['synthetic'] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC)
  14271. if char_list is None:
  14272. char_list = []
  14273. char_list.append(char_dict)
  14274. else: # add character byte to buffer
  14275. JM_append_rune(buff, ch.m_internal.c)
  14276. # all characters processed, now flush remaining span
  14277. if span:
  14278. if raw:
  14279. span[dictkey_chars] = char_list
  14280. char_list = None
  14281. else:
  14282. span[dictkey_text] = JM_EscapeStrFromBuffer(buff)
  14283. mupdf.fz_clear_buffer(buff)
  14284. span[dictkey_origin] = JM_py_from_point(span_origin)
  14285. span[dictkey_bbox] = JM_py_from_rect(span_rect)
  14286. if not mupdf.fz_is_empty_rect(span_rect):
  14287. span_list.append(span)
  14288. line_rect = mupdf.fz_union_rect(line_rect, span_rect)
  14289. span = None
  14290. if not mupdf.fz_is_empty_rect(line_rect):
  14291. line_dict[dictkey_spans] = span_list
  14292. else:
  14293. line_dict[dictkey_spans] = span_list
  14294. return line_rect
  14295. def _make_image_dict(img, img_dict):
  14296. """Populate a dictionary with information extracted from a given image.
  14297. Used by 'Document.extract_image' and by 'JM_make_image_block'.
  14298. Both of these functions will add some more specific information.
  14299. """
  14300. img_type = img.fz_compressed_image_type()
  14301. ext = JM_image_extension(img_type)
  14302. # compressed image buffer if present, else None
  14303. ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
  14304. if (0
  14305. or not ll_cbuf
  14306. or img_type in (mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN)
  14307. or img_type < mupdf.FZ_IMAGE_BMP
  14308. ):
  14309. # not an image with a compressed buffer: convert to PNG
  14310. res = mupdf.fz_new_buffer_from_image_as_png(
  14311. img,
  14312. mupdf.FzColorParams(mupdf.fz_default_color_params),
  14313. )
  14314. ext = "png"
  14315. elif ext == "jpeg" and img.n() == 4:
  14316. # JPEG with CMYK: invert colors
  14317. res = mupdf.fz_new_buffer_from_image_as_jpeg(
  14318. img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1)
  14319. else:
  14320. # copy the compressed buffer
  14321. res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
  14322. bytes_ = JM_BinFromBuffer(res)
  14323. img_dict[dictkey_width] = img.w()
  14324. img_dict[dictkey_height] = img.h()
  14325. img_dict[dictkey_ext] = ext
  14326. img_dict[dictkey_colorspace] = img.n()
  14327. img_dict[dictkey_xres] = img.xres()
  14328. img_dict[dictkey_yres] = img.yres()
  14329. img_dict[dictkey_bpc] = img.bpc()
  14330. img_dict[dictkey_size] = len(bytes_)
  14331. img_dict[dictkey_image] = bytes_
  14332. def JM_make_image_block(block, block_dict):
  14333. img = block.i_image()
  14334. _make_image_dict(img, block_dict)
  14335. # if the image has a mask, store it as a PNG buffer
  14336. mask = img.mask()
  14337. if mask.m_internal:
  14338. buff = mask.fz_new_buffer_from_image_as_png(mupdf.FzColorParams(mupdf.fz_default_color_params))
  14339. block_dict["mask"] = buff.fz_buffer_extract()
  14340. else:
  14341. block_dict["mask"] = None
  14342. block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
  14343. def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
  14344. if g_use_extra:
  14345. return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
  14346. line_list = []
  14347. block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
  14348. #log(f'{block=}')
  14349. for line in block:
  14350. #log(f'{line=}')
  14351. if (mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(line.m_internal.bbox)))
  14352. and not mupdf.fz_is_infinite_rect(tp_rect)
  14353. ):
  14354. continue
  14355. line_dict = dict()
  14356. line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
  14357. block_rect = mupdf.fz_union_rect(block_rect, line_rect)
  14358. line_dict[dictkey_wmode] = line.m_internal.wmode
  14359. line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir)
  14360. line_dict[dictkey_bbox] = JM_py_from_rect(line_rect)
  14361. line_list.append(line_dict)
  14362. block_dict[dictkey_bbox] = JM_py_from_rect(block_rect)
  14363. block_dict[dictkey_lines] = line_list
  14364. def JM_make_textpage_dict(tp, page_dict, raw):
  14365. if g_use_extra:
  14366. return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
  14367. text_buffer = mupdf.fz_new_buffer(128)
  14368. block_list = []
  14369. tp_rect = mupdf.FzRect(tp.m_internal.mediabox)
  14370. block_n = -1
  14371. #log( 'JM_make_textpage_dict {=tp}')
  14372. for block in tp:
  14373. block_n += 1
  14374. if (not mupdf.fz_contains_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox))
  14375. and not mupdf.fz_is_infinite_rect(tp_rect)
  14376. and block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE
  14377. ):
  14378. continue
  14379. if (not mupdf.fz_is_infinite_rect(tp_rect)
  14380. and mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox)))
  14381. ):
  14382. continue
  14383. block_dict = dict()
  14384. block_dict[dictkey_number] = block_n
  14385. block_dict[dictkey_type] = block.m_internal.type
  14386. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE:
  14387. block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
  14388. JM_make_image_block(block, block_dict)
  14389. else:
  14390. JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect)
  14391. block_list.append(block_dict)
  14392. page_dict[dictkey_blocks] = block_list
  14393. def JM_matrix_from_py(m):
  14394. a = [0, 0, 0, 0, 0, 0]
  14395. if isinstance(m, mupdf.FzMatrix):
  14396. return m
  14397. if isinstance(m, Matrix):
  14398. return mupdf.FzMatrix(m.a, m.b, m.c, m.d, m.e, m.f)
  14399. if not m or not PySequence_Check(m) or PySequence_Size(m) != 6:
  14400. return mupdf.FzMatrix()
  14401. for i in range(6):
  14402. a[i] = JM_FLOAT_ITEM(m, i)
  14403. if a[i] is None:
  14404. return mupdf.FzRect()
  14405. return mupdf.FzMatrix(a[0], a[1], a[2], a[3], a[4], a[5])
  14406. def JM_mediabox(page_obj):
  14407. '''
  14408. return a PDF page's MediaBox
  14409. '''
  14410. page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  14411. mediabox = mupdf.pdf_to_rect(
  14412. mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('MediaBox'))
  14413. )
  14414. if mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox):
  14415. mediabox.x0 = 0
  14416. mediabox.y0 = 0
  14417. mediabox.x1 = 612
  14418. mediabox.y1 = 792
  14419. page_mediabox = mupdf.FzRect(
  14420. mupdf.fz_min(mediabox.x0, mediabox.x1),
  14421. mupdf.fz_min(mediabox.y0, mediabox.y1),
  14422. mupdf.fz_max(mediabox.x0, mediabox.x1),
  14423. mupdf.fz_max(mediabox.y0, mediabox.y1),
  14424. )
  14425. if (page_mediabox.x1 - page_mediabox.x0 < 1
  14426. or page_mediabox.y1 - page_mediabox.y0 < 1
  14427. ):
  14428. page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  14429. return page_mediabox
  14430. def JM_merge_range(
  14431. doc_des,
  14432. doc_src,
  14433. spage,
  14434. epage,
  14435. apage,
  14436. rotate,
  14437. links,
  14438. annots,
  14439. show_progress,
  14440. graft_map,
  14441. ):
  14442. '''
  14443. Copy a range of pages (spage, epage) from a source PDF to a specified
  14444. location (apage) of the target PDF.
  14445. If spage > epage, the sequence of source pages is reversed.
  14446. '''
  14447. if g_use_extra:
  14448. return extra.JM_merge_range(
  14449. doc_des,
  14450. doc_src,
  14451. spage,
  14452. epage,
  14453. apage,
  14454. rotate,
  14455. links,
  14456. annots,
  14457. show_progress,
  14458. graft_map,
  14459. )
  14460. afterpage = apage
  14461. counter = 0 # copied pages counter
  14462. total = mupdf.fz_absi(epage - spage) + 1 # total pages to copy
  14463. if spage < epage:
  14464. page = spage
  14465. while page <= epage:
  14466. page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
  14467. counter += 1
  14468. if show_progress > 0 and counter % show_progress == 0:
  14469. message(f"Inserted {counter} of {total} pages.")
  14470. page += 1
  14471. afterpage += 1
  14472. else:
  14473. page = spage
  14474. while page >= epage:
  14475. page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
  14476. counter += 1
  14477. if show_progress > 0 and counter % show_progress == 0:
  14478. message(f"Inserted {counter} of {total} pages.")
  14479. page -= 1
  14480. afterpage += 1
  14481. def JM_merge_resources( page, temp_res):
  14482. '''
  14483. Merge the /Resources object created by a text pdf device into the page.
  14484. The device may have created multiple /ExtGState/Alp? and /Font/F? objects.
  14485. These need to be renamed (renumbered) to not overwrite existing page
  14486. objects from previous executions.
  14487. Returns the next available numbers n, m for objects /Alp<n>, /F<m>.
  14488. '''
  14489. # page objects /Resources, /Resources/ExtGState, /Resources/Font
  14490. resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
  14491. if not resources.m_internal:
  14492. resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 5)
  14493. main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
  14494. main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
  14495. # text pdf device objects /ExtGState, /Font
  14496. temp_extg = mupdf.pdf_dict_get(temp_res, PDF_NAME('ExtGState'))
  14497. temp_fonts = mupdf.pdf_dict_get(temp_res, PDF_NAME('Font'))
  14498. max_alp = -1
  14499. max_fonts = -1
  14500. # Handle /Alp objects
  14501. if mupdf.pdf_is_dict(temp_extg): # any created at all?
  14502. n = mupdf.pdf_dict_len(temp_extg)
  14503. if mupdf.pdf_is_dict(main_extg): # does page have /ExtGState yet?
  14504. for i in range(mupdf.pdf_dict_len(main_extg)):
  14505. # get highest number of objects named /Alpxxx
  14506. alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key(main_extg, i))
  14507. if not alp.startswith('Alp'):
  14508. continue
  14509. j = mupdf.fz_atoi(alp[3:])
  14510. if j > max_alp:
  14511. max_alp = j
  14512. else: # create a /ExtGState for the page
  14513. main_extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), n)
  14514. max_alp += 1
  14515. for i in range(n): # copy over renumbered /Alp objects
  14516. alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_extg, i))
  14517. j = mupdf.fz_atoi(alp[3:]) + max_alp
  14518. text = f'Alp{j}'
  14519. val = mupdf.pdf_dict_get_val( temp_extg, i)
  14520. mupdf.pdf_dict_puts(main_extg, text, val)
  14521. if mupdf.pdf_is_dict(main_fonts): # has page any fonts yet?
  14522. for i in range(mupdf.pdf_dict_len(main_fonts)): # get max font number
  14523. font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( main_fonts, i))
  14524. if not font.startswith("F"):
  14525. continue
  14526. j = mupdf.fz_atoi(font[1:])
  14527. if j > max_fonts:
  14528. max_fonts = j
  14529. else: # create a Resources/Font for the page
  14530. main_fonts = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Font'), 2)
  14531. max_fonts += 1
  14532. for i in range(mupdf.pdf_dict_len(temp_fonts)): # copy renumbered fonts
  14533. font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_fonts, i))
  14534. j = mupdf.fz_atoi(font[1:]) + max_fonts
  14535. text = f'F{j}'
  14536. val = mupdf.pdf_dict_get_val(temp_fonts, i)
  14537. mupdf.pdf_dict_puts(main_fonts, text, val)
  14538. return (max_alp, max_fonts) # next available numbers
  14539. def JM_mupdf_warning( text):
  14540. '''
  14541. redirect MuPDF warnings
  14542. '''
  14543. JM_mupdf_warnings_store.append(text)
  14544. if JM_mupdf_show_warnings:
  14545. message(f'MuPDF warning: {text}')
  14546. def JM_mupdf_error( text):
  14547. JM_mupdf_warnings_store.append(text)
  14548. if JM_mupdf_show_errors:
  14549. message(f'MuPDF error: {text}\n')
  14550. def JM_new_bbox_device(rc, inc_layers):
  14551. assert isinstance(rc, list)
  14552. return JM_new_bbox_device_Device( rc, inc_layers)
  14553. def JM_new_buffer_from_stext_page(page):
  14554. '''
  14555. make a buffer from an stext_page's text
  14556. '''
  14557. assert isinstance(page, mupdf.FzStextPage)
  14558. rect = mupdf.FzRect(page.m_internal.mediabox)
  14559. buf = mupdf.fz_new_buffer(256)
  14560. for block in page:
  14561. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  14562. for line in block:
  14563. for ch in line:
  14564. if (not JM_rects_overlap(rect, JM_char_bbox(line, ch))
  14565. and not mupdf.fz_is_infinite_rect(rect)
  14566. ):
  14567. continue
  14568. mupdf.fz_append_rune(buf, ch.m_internal.c)
  14569. mupdf.fz_append_byte(buf, ord('\n'))
  14570. mupdf.fz_append_byte(buf, ord('\n'))
  14571. return buf
  14572. def JM_new_javascript(pdf, value):
  14573. '''
  14574. make new PDF action object from JavaScript source
  14575. Parameters are a PDF document and a Python string.
  14576. Returns a PDF action object.
  14577. '''
  14578. if value is None:
  14579. # no argument given
  14580. return
  14581. data = JM_StrAsChar(value)
  14582. if data is None:
  14583. # not convertible to char*
  14584. return
  14585. res = mupdf.fz_new_buffer_from_copied_data(data.encode('utf8'))
  14586. source = mupdf.pdf_add_stream(pdf, res, mupdf.PdfObj(), 0)
  14587. newaction = mupdf.pdf_add_new_dict(pdf, 4)
  14588. mupdf.pdf_dict_put(newaction, PDF_NAME('S'), mupdf.pdf_new_name('JavaScript'))
  14589. mupdf.pdf_dict_put(newaction, PDF_NAME('JS'), source)
  14590. return newaction
  14591. def JM_new_output_fileptr(bio):
  14592. return JM_new_output_fileptr_Output( bio)
  14593. def JM_norm_rotation(rotate):
  14594. '''
  14595. # return normalized /Rotate value:one of 0, 90, 180, 270
  14596. '''
  14597. while rotate < 0:
  14598. rotate += 360
  14599. while rotate >= 360:
  14600. rotate -= 360
  14601. if rotate % 90 != 0:
  14602. return 0
  14603. return rotate
  14604. def JM_object_to_buffer(what, compress, ascii):
  14605. res = mupdf.fz_new_buffer(512)
  14606. out = mupdf.FzOutput(res)
  14607. mupdf.pdf_print_obj(out, what, compress, ascii)
  14608. out.fz_close_output()
  14609. mupdf.fz_terminate_buffer(res)
  14610. return res
  14611. def JM_outline_xrefs(obj, xrefs):
  14612. '''
  14613. Return list of outline xref numbers. Recursive function. Arguments:
  14614. 'obj' first OL item
  14615. 'xrefs' empty Python list
  14616. '''
  14617. if not obj.m_internal:
  14618. return xrefs
  14619. thisobj = obj
  14620. while thisobj.m_internal:
  14621. newxref = mupdf.pdf_to_num( thisobj)
  14622. if newxref in xrefs or mupdf.pdf_dict_get( thisobj, PDF_NAME('Type')).m_internal:
  14623. # circular ref or top of chain: terminate
  14624. break
  14625. xrefs.append( newxref)
  14626. first = mupdf.pdf_dict_get( thisobj, PDF_NAME('First')) # try go down
  14627. if mupdf.pdf_is_dict( first):
  14628. xrefs = JM_outline_xrefs( first, xrefs)
  14629. thisobj = mupdf.pdf_dict_get( thisobj, PDF_NAME('Next')) # try go next
  14630. parent = mupdf.pdf_dict_get( thisobj, PDF_NAME('Parent')) # get parent
  14631. if not mupdf.pdf_is_dict( thisobj):
  14632. thisobj = parent
  14633. return xrefs
  14634. def JM_page_rotation(page):
  14635. '''
  14636. return a PDF page's /Rotate value: one of (0, 90, 180, 270)
  14637. '''
  14638. rotate = 0
  14639. obj = mupdf.pdf_dict_get_inheritable( page.obj(), mupdf.PDF_ENUM_NAME_Rotate)
  14640. rotate = mupdf.pdf_to_int(obj)
  14641. rotate = JM_norm_rotation(rotate)
  14642. return rotate
  14643. def JM_pdf_obj_from_str(doc, src):
  14644. '''
  14645. create PDF object from given string (new in v1.14.0: MuPDF dropped it)
  14646. '''
  14647. # fixme: seems inefficient to convert to bytes instance then make another
  14648. # copy inside fz_new_buffer_from_copied_data(), but no other way?
  14649. #
  14650. buffer_ = mupdf.fz_new_buffer_from_copied_data(bytes(src, 'utf8'))
  14651. stream = mupdf.fz_open_buffer(buffer_)
  14652. lexbuf = mupdf.PdfLexbuf(mupdf.PDF_LEXBUF_SMALL)
  14653. result = mupdf.pdf_parse_stm_obj(doc, stream, lexbuf)
  14654. return result
  14655. def JM_pixmap_from_display_list(
  14656. list_,
  14657. ctm,
  14658. cs,
  14659. alpha,
  14660. clip,
  14661. seps,
  14662. ):
  14663. '''
  14664. Version of fz_new_pixmap_from_display_list (util.c) to also support
  14665. rendering of only the 'clip' part of the displaylist rectangle
  14666. '''
  14667. assert isinstance(list_, mupdf.FzDisplayList)
  14668. if seps is None:
  14669. seps = mupdf.FzSeparations()
  14670. assert seps is None or isinstance(seps, mupdf.FzSeparations), f'{type(seps)=}: {seps}'
  14671. rect = mupdf.fz_bound_display_list(list_)
  14672. matrix = JM_matrix_from_py(ctm)
  14673. rclip = JM_rect_from_py(clip)
  14674. rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
  14675. rect = mupdf.fz_transform_rect(rect, matrix)
  14676. irect = mupdf.fz_round_rect(rect)
  14677. assert isinstance( cs, mupdf.FzColorspace)
  14678. pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha)
  14679. if alpha:
  14680. mupdf.fz_clear_pixmap(pix)
  14681. else:
  14682. mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
  14683. if not mupdf.fz_is_infinite_rect(rclip):
  14684. dev = mupdf.fz_new_draw_device_with_bbox(matrix, pix, irect)
  14685. mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), rclip, mupdf.FzCookie())
  14686. else:
  14687. dev = mupdf.fz_new_draw_device(matrix, pix)
  14688. mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), mupdf.FzCookie())
  14689. mupdf.fz_close_device(dev)
  14690. # Use special raw Pixmap constructor so we don't set alpha to true.
  14691. return Pixmap( 'raw', pix)
  14692. def JM_point_from_py(p):
  14693. '''
  14694. PySequence to fz_point. Default: (FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
  14695. '''
  14696. if isinstance(p, mupdf.FzPoint):
  14697. return p
  14698. if isinstance(p, Point):
  14699. return mupdf.FzPoint(p.x, p.y)
  14700. if g_use_extra:
  14701. return extra.JM_point_from_py( p)
  14702. p0 = mupdf.FzPoint(0, 0)
  14703. x = JM_FLOAT_ITEM(p, 0)
  14704. y = JM_FLOAT_ITEM(p, 1)
  14705. if x is None or y is None:
  14706. return p0
  14707. x = max( x, FZ_MIN_INF_RECT)
  14708. y = max( y, FZ_MIN_INF_RECT)
  14709. x = min( x, FZ_MAX_INF_RECT)
  14710. y = min( y, FZ_MAX_INF_RECT)
  14711. return mupdf.FzPoint(x, y)
  14712. def JM_print_stext_page_as_text(res, page):
  14713. '''
  14714. Plain text output. An identical copy of fz_print_stext_page_as_text,
  14715. but lines within a block are concatenated by space instead a new-line
  14716. character (which else leads to 2 new-lines).
  14717. '''
  14718. if 1 and g_use_extra:
  14719. return extra.JM_print_stext_page_as_text(res, page)
  14720. assert isinstance(res, mupdf.FzBuffer)
  14721. assert isinstance(page, mupdf.FzStextPage)
  14722. rect = mupdf.FzRect(page.m_internal.mediabox)
  14723. last_char = 0
  14724. n_blocks = 0
  14725. n_lines = 0
  14726. n_chars = 0
  14727. for n_blocks2, block in enumerate( page):
  14728. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  14729. for n_lines2, line in enumerate( block):
  14730. for n_chars2, ch in enumerate( line):
  14731. pass
  14732. n_chars += n_chars2
  14733. n_lines += n_lines2
  14734. n_blocks += n_blocks2
  14735. for block in page:
  14736. if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
  14737. for line in block:
  14738. last_char = 0
  14739. for ch in line:
  14740. chbbox = JM_char_bbox(line, ch)
  14741. if (mupdf.fz_is_infinite_rect(rect)
  14742. or JM_rects_overlap(rect, chbbox)
  14743. ):
  14744. #raw += chr(ch.m_internal.c)
  14745. last_char = ch.m_internal.c
  14746. #log( '{=last_char!r utf!r}')
  14747. JM_append_rune(res, last_char)
  14748. if last_char != 10 and last_char > 0:
  14749. mupdf.fz_append_string(res, "\n")
  14750. def JM_put_script(annot_obj, key1, key2, value):
  14751. '''
  14752. Create a JavaScript PDF action.
  14753. Usable for all object types which support PDF actions, even if the
  14754. argument name suggests annotations. Up to 2 key values can be specified, so
  14755. JavaScript actions can be stored for '/A' and '/AA/?' keys.
  14756. '''
  14757. key1_obj = mupdf.pdf_dict_get(annot_obj, key1)
  14758. pdf = mupdf.pdf_get_bound_document(annot_obj) # owning PDF
  14759. # if no new script given, just delete corresponding key
  14760. if not value:
  14761. if key2 is None or not key2.m_internal:
  14762. mupdf.pdf_dict_del(annot_obj, key1)
  14763. elif key1_obj.m_internal:
  14764. mupdf.pdf_dict_del(key1_obj, key2)
  14765. return
  14766. # read any existing script as a PyUnicode string
  14767. if not key2.m_internal or not key1_obj.m_internal:
  14768. script = JM_get_script(key1_obj)
  14769. else:
  14770. script = JM_get_script(mupdf.pdf_dict_get(key1_obj, key2))
  14771. # replace old script, if different from new one
  14772. if value != script:
  14773. newaction = JM_new_javascript(pdf, value)
  14774. if not key2.m_internal:
  14775. mupdf.pdf_dict_put(annot_obj, key1, newaction)
  14776. else:
  14777. mupdf.pdf_dict_putl(annot_obj, newaction, key1, key2)
  14778. def JM_py_from_irect(r):
  14779. return r.x0, r.y0, r.x1, r.y1
  14780. def JM_py_from_matrix(m):
  14781. return m.a, m.b, m.c, m.d, m.e, m.f
  14782. def JM_py_from_point(p):
  14783. return p.x, p.y
  14784. def JM_py_from_quad(q):
  14785. '''
  14786. PySequence from fz_quad.
  14787. '''
  14788. return (
  14789. (q.ul.x, q.ul.y),
  14790. (q.ur.x, q.ur.y),
  14791. (q.ll.x, q.ll.y),
  14792. (q.lr.x, q.lr.y),
  14793. )
  14794. def JM_py_from_rect(r):
  14795. return r.x0, r.y0, r.x1, r.y1
  14796. def JM_quad_from_py(r):
  14797. if isinstance(r, mupdf.FzQuad):
  14798. return r
  14799. # cover all cases of 4-float-sequences
  14800. if hasattr(r, "__getitem__") and len(r) == 4 and hasattr(r[0], "__float__"):
  14801. r = mupdf.FzRect(*tuple(r))
  14802. if isinstance( r, mupdf.FzRect):
  14803. return mupdf.fz_quad_from_rect( r)
  14804. if isinstance( r, Quad):
  14805. return mupdf.fz_make_quad(
  14806. r.ul.x, r.ul.y,
  14807. r.ur.x, r.ur.y,
  14808. r.ll.x, r.ll.y,
  14809. r.lr.x, r.lr.y,
  14810. )
  14811. q = mupdf.fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0)
  14812. p = [0,0,0,0]
  14813. if not r or not isinstance(r, (tuple, list)) or len(r) != 4:
  14814. return q
  14815. if JM_FLOAT_ITEM(r, 0) is None:
  14816. return mupdf.fz_quad_from_rect(JM_rect_from_py(r))
  14817. for i in range(4):
  14818. if i >= len(r):
  14819. return q # invalid: cancel the rest
  14820. obj = r[i] # next point item
  14821. if not PySequence_Check(obj) or PySequence_Size(obj) != 2:
  14822. return q # invalid: cancel the rest
  14823. p[i].x = JM_FLOAT_ITEM(obj, 0)
  14824. p[i].y = JM_FLOAT_ITEM(obj, 1)
  14825. if p[i].x is None or p[i].y is None:
  14826. return q
  14827. p[i].x = max( p[i].x, FZ_MIN_INF_RECT)
  14828. p[i].y = max( p[i].y, FZ_MIN_INF_RECT)
  14829. p[i].x = min( p[i].x, FZ_MAX_INF_RECT)
  14830. p[i].y = min( p[i].y, FZ_MAX_INF_RECT)
  14831. q.ul = p[0]
  14832. q.ur = p[1]
  14833. q.ll = p[2]
  14834. q.lr = p[3]
  14835. return q
  14836. def JM_read_contents(pageref):
  14837. '''
  14838. Read and concatenate a PDF page's /Contents object(s) in a buffer
  14839. '''
  14840. assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}'
  14841. contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents)
  14842. if mupdf.pdf_is_array(contents):
  14843. res = mupdf.FzBuffer(1024)
  14844. for i in range(mupdf.pdf_array_len(contents)):
  14845. if i > 0:
  14846. mupdf.fz_append_byte(res, 32)
  14847. obj = mupdf.pdf_array_get(contents, i)
  14848. if mupdf.pdf_is_stream(obj):
  14849. nres = mupdf.pdf_load_stream(obj)
  14850. mupdf.fz_append_buffer(res, nres)
  14851. elif contents.m_internal:
  14852. res = mupdf.pdf_load_stream(contents)
  14853. else:
  14854. res = mupdf.FzBuffer(0)
  14855. return res
  14856. def JM_rect_from_py(r):
  14857. if isinstance(r, mupdf.FzRect):
  14858. return r
  14859. if isinstance(r, mupdf.FzIrect):
  14860. return mupdf.FzRect(r)
  14861. if isinstance(r, Rect):
  14862. return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
  14863. if isinstance(r, IRect):
  14864. return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
  14865. if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
  14866. return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
  14867. f = [0, 0, 0, 0]
  14868. for i in range(4):
  14869. f[i] = JM_FLOAT_ITEM(r, i)
  14870. if f[i] is None:
  14871. return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
  14872. if f[i] < FZ_MIN_INF_RECT:
  14873. f[i] = FZ_MIN_INF_RECT
  14874. if f[i] > FZ_MAX_INF_RECT:
  14875. f[i] = FZ_MAX_INF_RECT
  14876. return mupdf.fz_make_rect(f[0], f[1], f[2], f[3])
  14877. def JM_rects_overlap(a, b):
  14878. if (0
  14879. or a.x0 >= b.x1
  14880. or a.y0 >= b.y1
  14881. or a.x1 <= b.x0
  14882. or a.y1 <= b.y0
  14883. ):
  14884. return 0
  14885. return 1
  14886. def JM_refresh_links( page):
  14887. '''
  14888. refreshes the link and annotation tables of a page
  14889. '''
  14890. if page is None or not page.m_internal:
  14891. return
  14892. obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
  14893. if obj.m_internal:
  14894. pdf = page.doc()
  14895. number = mupdf.pdf_lookup_page_number( pdf, page.obj())
  14896. page_mediabox = mupdf.FzRect()
  14897. page_ctm = mupdf.FzMatrix()
  14898. mupdf.pdf_page_transform( page, page_mediabox, page_ctm)
  14899. link = mupdf.pdf_load_link_annots( pdf, page, obj, number, page_ctm)
  14900. page.m_internal.links = mupdf.ll_fz_keep_link( link.m_internal)
  14901. def JM_rotate_page_matrix(page):
  14902. '''
  14903. calculate page rotation matrices
  14904. '''
  14905. if not page.m_internal:
  14906. return mupdf.FzMatrix() # no valid pdf page given
  14907. rotation = JM_page_rotation(page)
  14908. #log( '{rotation=}')
  14909. if rotation == 0:
  14910. return mupdf.FzMatrix() # no rotation
  14911. cb_size = JM_cropbox_size(page.obj())
  14912. w = cb_size.x
  14913. h = cb_size.y
  14914. #log( '{=h w}')
  14915. if rotation == 90:
  14916. m = mupdf.fz_make_matrix(0, 1, -1, 0, h, 0)
  14917. elif rotation == 180:
  14918. m = mupdf.fz_make_matrix(-1, 0, 0, -1, w, h)
  14919. else:
  14920. m = mupdf.fz_make_matrix(0, -1, 1, 0, 0, w)
  14921. #log( 'returning {m=}')
  14922. return m
  14923. def JM_search_stext_page(page, needle):
  14924. if g_use_extra:
  14925. return extra.JM_search_stext_page(page.m_internal, needle)
  14926. rect = mupdf.FzRect(page.m_internal.mediabox)
  14927. if not needle:
  14928. return
  14929. quads = []
  14930. class Hits:
  14931. def __str__(self):
  14932. return f'Hits(len={self.len} quads={self.quads} hfuzz={self.hfuzz} vfuzz={self.vfuzz}'
  14933. hits = Hits()
  14934. hits.len = 0
  14935. hits.quads = quads
  14936. hits.hfuzz = 0.2 # merge kerns but not large gaps
  14937. hits.vfuzz = 0.1
  14938. buffer_ = JM_new_buffer_from_stext_page(page)
  14939. haystack_string = mupdf.fz_string_from_buffer(buffer_)
  14940. haystack = 0
  14941. begin, end = find_string(haystack_string[haystack:], needle)
  14942. if begin is None:
  14943. #goto no_more_matches;
  14944. return quads
  14945. begin += haystack
  14946. end += haystack
  14947. inside = 0
  14948. i = 0
  14949. for block in page:
  14950. if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
  14951. continue
  14952. for line in block:
  14953. for ch in line:
  14954. i += 1
  14955. if not mupdf.fz_is_infinite_rect(rect):
  14956. r = JM_char_bbox(line, ch)
  14957. if not JM_rects_overlap(rect, r):
  14958. #goto next_char;
  14959. continue
  14960. while 1:
  14961. #try_new_match:
  14962. if not inside:
  14963. if haystack >= begin:
  14964. inside = 1
  14965. if inside:
  14966. if haystack < end:
  14967. on_highlight_char(hits, line, ch)
  14968. break
  14969. else:
  14970. inside = 0
  14971. begin, end = find_string(haystack_string[haystack:], needle)
  14972. if begin is None:
  14973. #goto no_more_matches;
  14974. return quads
  14975. else:
  14976. #goto try_new_match;
  14977. begin += haystack
  14978. end += haystack
  14979. continue
  14980. break
  14981. haystack += 1
  14982. #next_char:;
  14983. assert haystack_string[haystack] == '\n', \
  14984. f'{haystack=} {haystack_string[haystack]=}'
  14985. haystack += 1
  14986. assert haystack_string[haystack] == '\n', \
  14987. f'{haystack=} {haystack_string[haystack]=}'
  14988. haystack += 1
  14989. #no_more_matches:;
  14990. return quads
  14991. def JM_scan_resources(pdf, rsrc, liste, what, stream_xref, tracer):
  14992. '''
  14993. Step through /Resources, looking up image, xobject or font information
  14994. '''
  14995. if mupdf.pdf_mark_obj(rsrc):
  14996. mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
  14997. return # Circular dependencies!
  14998. try:
  14999. xobj = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_XObject)
  15000. if what == 1: # lookup fonts
  15001. font = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_Font)
  15002. JM_gather_fonts(pdf, font, liste, stream_xref)
  15003. elif what == 2: # look up images
  15004. JM_gather_images(pdf, xobj, liste, stream_xref)
  15005. elif what == 3: # look up form xobjects
  15006. JM_gather_forms(pdf, xobj, liste, stream_xref)
  15007. else: # should never happen
  15008. return
  15009. # check if we need to recurse into Form XObjects
  15010. n = mupdf.pdf_dict_len(xobj)
  15011. for i in range(n):
  15012. obj = mupdf.pdf_dict_get_val(xobj, i)
  15013. if mupdf.pdf_is_stream(obj):
  15014. sxref = mupdf.pdf_to_num(obj)
  15015. else:
  15016. sxref = 0
  15017. subrsrc = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Resources)
  15018. if subrsrc.m_internal:
  15019. sxref_t = sxref
  15020. if sxref_t not in tracer:
  15021. tracer.append(sxref_t)
  15022. JM_scan_resources( pdf, subrsrc, liste, what, sxref, tracer)
  15023. else:
  15024. mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
  15025. return
  15026. finally:
  15027. mupdf.pdf_unmark_obj(rsrc)
  15028. def JM_set_choice_options(annot, liste):
  15029. '''
  15030. set ListBox / ComboBox values
  15031. '''
  15032. if not liste:
  15033. return
  15034. assert isinstance( liste, (tuple, list))
  15035. n = len( liste)
  15036. if n == 0:
  15037. return
  15038. annot_obj = mupdf.pdf_annot_obj( annot)
  15039. pdf = mupdf.pdf_get_bound_document( annot_obj)
  15040. optarr = mupdf.pdf_new_array( pdf, n)
  15041. for i in range(n):
  15042. val = liste[i]
  15043. opt = val
  15044. if isinstance(opt, str):
  15045. mupdf.pdf_array_push_text_string( optarr, opt)
  15046. else:
  15047. assert isinstance( val, (tuple, list)) and len( val) == 2, 'bad choice field list'
  15048. opt1, opt2 = val
  15049. assert opt1 and opt2, 'bad choice field list'
  15050. optarrsub = mupdf.pdf_array_push_array( optarr, 2)
  15051. mupdf.pdf_array_push_text_string( optarrsub, opt1)
  15052. mupdf.pdf_array_push_text_string( optarrsub, opt2)
  15053. mupdf.pdf_dict_put( annot_obj, PDF_NAME('Opt'), optarr)
  15054. def JM_set_field_type(doc, obj, type):
  15055. '''
  15056. Set the field type
  15057. '''
  15058. setbits = 0
  15059. clearbits = 0
  15060. typename = None
  15061. if type == mupdf.PDF_WIDGET_TYPE_BUTTON:
  15062. typename = PDF_NAME('Btn')
  15063. setbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
  15064. elif type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  15065. typename = PDF_NAME('Btn')
  15066. clearbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
  15067. setbits = mupdf.PDF_BTN_FIELD_IS_RADIO
  15068. elif type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
  15069. typename = PDF_NAME('Btn')
  15070. clearbits = (mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | mupdf.PDF_BTN_FIELD_IS_RADIO)
  15071. elif type == mupdf.PDF_WIDGET_TYPE_TEXT:
  15072. typename = PDF_NAME('Tx')
  15073. elif type == mupdf.PDF_WIDGET_TYPE_LISTBOX:
  15074. typename = PDF_NAME('Ch')
  15075. clearbits = mupdf.PDF_CH_FIELD_IS_COMBO
  15076. elif type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
  15077. typename = PDF_NAME('Ch')
  15078. setbits = mupdf.PDF_CH_FIELD_IS_COMBO
  15079. elif type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
  15080. typename = PDF_NAME('Sig')
  15081. if typename is not None and typename.m_internal:
  15082. mupdf.pdf_dict_put(obj, PDF_NAME('FT'), typename)
  15083. if setbits != 0 or clearbits != 0:
  15084. bits = mupdf.pdf_dict_get_int(obj, PDF_NAME('Ff'))
  15085. bits &= ~clearbits
  15086. bits |= setbits
  15087. mupdf.pdf_dict_put_int(obj, PDF_NAME('Ff'), bits)
  15088. def JM_set_object_value(obj, key, value):
  15089. '''
  15090. Set a PDF dict key to some value
  15091. '''
  15092. eyecatcher = "fitz: replace me!"
  15093. pdf = mupdf.pdf_get_bound_document(obj)
  15094. # split PDF key at path seps and take last key part
  15095. list_ = key.split('/')
  15096. len_ = len(list_)
  15097. i = len_ - 1
  15098. skey = list_[i]
  15099. del list_[i] # del the last sub-key
  15100. len_ = len(list_) # remaining length
  15101. testkey = mupdf.pdf_dict_getp(obj, key) # check if key already exists
  15102. if not testkey.m_internal:
  15103. #No, it will be created here. But we cannot allow this happening if
  15104. #indirect objects are referenced. So we check all higher level
  15105. #sub-paths for indirect references.
  15106. while len_ > 0:
  15107. t = '/'.join(list_) # next high level
  15108. if mupdf.pdf_is_indirect(mupdf.pdf_dict_getp(obj, JM_StrAsChar(t))):
  15109. raise Exception("path to '%s' has indirects", JM_StrAsChar(skey))
  15110. del list_[len_ - 1] # del last sub-key
  15111. len_ = len(list_) # remaining length
  15112. # Insert our eyecatcher. Will create all sub-paths in the chain, or
  15113. # respectively remove old value of key-path.
  15114. mupdf.pdf_dict_putp(obj, key, mupdf.pdf_new_text_string(eyecatcher))
  15115. testkey = mupdf.pdf_dict_getp(obj, key)
  15116. if not mupdf.pdf_is_string(testkey):
  15117. raise Exception("cannot insert value for '%s'", key)
  15118. temp = mupdf.pdf_to_text_string(testkey)
  15119. if temp != eyecatcher:
  15120. raise Exception("cannot insert value for '%s'", key)
  15121. # read the result as a string
  15122. res = JM_object_to_buffer(obj, 1, 0)
  15123. objstr = JM_EscapeStrFromBuffer(res)
  15124. # replace 'eyecatcher' by desired 'value'
  15125. nullval = "/%s(%s)" % ( skey, eyecatcher)
  15126. newval = "/%s %s" % (skey, value)
  15127. newstr = objstr.replace(nullval, newval, 1)
  15128. # make PDF object from resulting string
  15129. new_obj = JM_pdf_obj_from_str(pdf, newstr)
  15130. return new_obj
  15131. def JM_set_ocg_arrays(conf, basestate, on, off, rbgroups, locked):
  15132. if basestate:
  15133. mupdf.pdf_dict_put_name( conf, PDF_NAME('BaseState'), basestate)
  15134. if on is not None:
  15135. mupdf.pdf_dict_del( conf, PDF_NAME('ON'))
  15136. if on:
  15137. arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('ON'), 1)
  15138. JM_set_ocg_arrays_imp( arr, on)
  15139. if off is not None:
  15140. mupdf.pdf_dict_del( conf, PDF_NAME('OFF'))
  15141. if off:
  15142. arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('OFF'), 1)
  15143. JM_set_ocg_arrays_imp( arr, off)
  15144. if locked is not None:
  15145. mupdf.pdf_dict_del( conf, PDF_NAME('Locked'))
  15146. if locked:
  15147. arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('Locked'), 1)
  15148. JM_set_ocg_arrays_imp( arr, locked)
  15149. if rbgroups is not None:
  15150. mupdf.pdf_dict_del( conf, PDF_NAME('RBGroups'))
  15151. if rbgroups:
  15152. arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('RBGroups'), 1)
  15153. n =len(rbgroups)
  15154. for i in range(n):
  15155. item0 = rbgroups[i]
  15156. obj = mupdf.pdf_array_push_array( arr, 1)
  15157. JM_set_ocg_arrays_imp( obj, item0)
  15158. def JM_set_ocg_arrays_imp(arr, list_):
  15159. '''
  15160. Set OCG arrays from dict of Python lists
  15161. Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list}
  15162. '''
  15163. pdf = mupdf.pdf_get_bound_document(arr)
  15164. for xref in list_:
  15165. obj = mupdf.pdf_new_indirect(pdf, xref, 0)
  15166. mupdf.pdf_array_push(arr, obj)
  15167. def JM_set_resource_property(ref, name, xref):
  15168. '''
  15169. Insert an item into Resources/Properties (used for Marked Content)
  15170. Arguments:
  15171. (1) e.g. page object, Form XObject
  15172. (2) marked content name
  15173. (3) xref of the referenced object (insert as indirect reference)
  15174. '''
  15175. pdf = mupdf.pdf_get_bound_document(ref)
  15176. ind = mupdf.pdf_new_indirect(pdf, xref, 0)
  15177. if not ind.m_internal:
  15178. RAISEPY(MSG_BAD_XREF, PyExc_ValueError)
  15179. resources = mupdf.pdf_dict_get(ref, PDF_NAME('Resources'))
  15180. if not resources.m_internal:
  15181. resources = mupdf.pdf_dict_put_dict(ref, PDF_NAME('Resources'), 1)
  15182. properties = mupdf.pdf_dict_get(resources, PDF_NAME('Properties'))
  15183. if not properties.m_internal:
  15184. properties = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Properties'), 1)
  15185. mupdf.pdf_dict_put(properties, mupdf.pdf_new_name(name), ind)
  15186. def JM_set_widget_properties(annot, Widget):
  15187. '''
  15188. Update the PDF form field with the properties from a Python Widget object.
  15189. Called by "Page.add_widget" and "Annot.update_widget".
  15190. '''
  15191. if isinstance( annot, Annot):
  15192. annot = annot.this
  15193. assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}'
  15194. page = _pdf_annot_page(annot)
  15195. assert page.m_internal, 'Annot is not bound to a page'
  15196. annot_obj = mupdf.pdf_annot_obj(annot)
  15197. pdf = page.doc()
  15198. def GETATTR(name):
  15199. return getattr(Widget, name, None)
  15200. value = GETATTR("field_type")
  15201. field_type = value
  15202. # rectangle --------------------------------------------------------------
  15203. value = GETATTR("rect")
  15204. rect = JM_rect_from_py(value)
  15205. rot_mat = JM_rotate_page_matrix(page)
  15206. rect = mupdf.fz_transform_rect(rect, rot_mat)
  15207. mupdf.pdf_set_annot_rect(annot, rect)
  15208. # fill color -------------------------------------------------------------
  15209. value = GETATTR("fill_color")
  15210. if value and PySequence_Check(value):
  15211. n = len(value)
  15212. fill_col = mupdf.pdf_new_array(pdf, n)
  15213. col = 0
  15214. for i in range(n):
  15215. col = value[i]
  15216. mupdf.pdf_array_push_real(fill_col, col)
  15217. mupdf.pdf_field_set_fill_color(annot_obj, fill_col)
  15218. # dashes -----------------------------------------------------------------
  15219. value = GETATTR("border_dashes")
  15220. if value and PySequence_Check(value):
  15221. n = len(value)
  15222. dashes = mupdf.pdf_new_array(pdf, n)
  15223. for i in range(n):
  15224. mupdf.pdf_array_push_int(dashes, value[i])
  15225. mupdf.pdf_dict_putl(annot_obj, dashes, PDF_NAME('BS'), PDF_NAME('D'))
  15226. # border color -----------------------------------------------------------
  15227. value = GETATTR("border_color")
  15228. if value and PySequence_Check(value):
  15229. n = len(value)
  15230. border_col = mupdf.pdf_new_array(pdf, n)
  15231. col = 0
  15232. for i in range(n):
  15233. col = value[i]
  15234. mupdf.pdf_array_push_real(border_col, col)
  15235. mupdf.pdf_dict_putl(annot_obj, border_col, PDF_NAME('MK'), PDF_NAME('BC'))
  15236. # entry ignored - may be used later
  15237. #
  15238. #int text_format = (int) PyInt_AsLong(GETATTR("text_format"));
  15239. #
  15240. # field label -----------------------------------------------------------
  15241. value = GETATTR("field_label")
  15242. if value is not None:
  15243. label = JM_StrAsChar(value)
  15244. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('TU'), label)
  15245. # field name -------------------------------------------------------------
  15246. value = GETATTR("field_name")
  15247. if value is not None:
  15248. name = JM_StrAsChar(value)
  15249. old_name = mupdf.pdf_load_field_name(annot_obj)
  15250. if name != old_name:
  15251. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), name)
  15252. # max text len -----------------------------------------------------------
  15253. if field_type == mupdf.PDF_WIDGET_TYPE_TEXT:
  15254. value = GETATTR("text_maxlen")
  15255. text_maxlen = value
  15256. if text_maxlen:
  15257. mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('MaxLen'), text_maxlen)
  15258. value = GETATTR("field_display")
  15259. d = value
  15260. mupdf.pdf_field_set_display(annot_obj, d)
  15261. # choice values ----------------------------------------------------------
  15262. if field_type in (mupdf.PDF_WIDGET_TYPE_LISTBOX, mupdf.PDF_WIDGET_TYPE_COMBOBOX):
  15263. value = GETATTR("choice_values")
  15264. JM_set_choice_options(annot, value)
  15265. # border style -----------------------------------------------------------
  15266. value = GETATTR("border_style")
  15267. val = JM_get_border_style(value)
  15268. mupdf.pdf_dict_putl(annot_obj, val, PDF_NAME('BS'), PDF_NAME('S'))
  15269. # border width -----------------------------------------------------------
  15270. value = GETATTR("border_width")
  15271. border_width = value
  15272. mupdf.pdf_dict_putl(
  15273. annot_obj,
  15274. mupdf.pdf_new_real(border_width),
  15275. PDF_NAME('BS'),
  15276. PDF_NAME('W'),
  15277. )
  15278. # /DA string -------------------------------------------------------------
  15279. value = GETATTR("_text_da")
  15280. da = JM_StrAsChar(value)
  15281. mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('DA'), da)
  15282. mupdf.pdf_dict_del(annot_obj, PDF_NAME('DS')) # not supported by MuPDF
  15283. mupdf.pdf_dict_del(annot_obj, PDF_NAME('RC')) # not supported by MuPDF
  15284. # field flags ------------------------------------------------------------
  15285. field_flags = GETATTR("field_flags")
  15286. if field_flags is not None:
  15287. if field_type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
  15288. field_flags |= mupdf.PDF_CH_FIELD_IS_COMBO
  15289. elif field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  15290. field_flags |= mupdf.PDF_BTN_FIELD_IS_RADIO
  15291. elif field_type == mupdf.PDF_WIDGET_TYPE_BUTTON:
  15292. field_flags |= mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
  15293. mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Ff'), field_flags)
  15294. # button caption ---------------------------------------------------------
  15295. value = GETATTR("button_caption")
  15296. ca = JM_StrAsChar(value)
  15297. if ca:
  15298. mupdf.pdf_field_set_button_caption(annot_obj, ca)
  15299. # script (/A) -------------------------------------------------------
  15300. value = GETATTR("script")
  15301. JM_put_script(annot_obj, PDF_NAME('A'), mupdf.PdfObj(), value)
  15302. # script (/AA/K) -------------------------------------------------------
  15303. value = GETATTR("script_stroke")
  15304. JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('K'), value)
  15305. # script (/AA/F) -------------------------------------------------------
  15306. value = GETATTR("script_format")
  15307. JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('F'), value)
  15308. # script (/AA/V) -------------------------------------------------------
  15309. value = GETATTR("script_change")
  15310. JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('V'), value)
  15311. # script (/AA/C) -------------------------------------------------------
  15312. value = GETATTR("script_calc")
  15313. JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('C'), value)
  15314. # script (/AA/Bl) -------------------------------------------------------
  15315. value = GETATTR("script_blur")
  15316. JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value)
  15317. # script (/AA/Fo) codespell:ignore --------------------------------------
  15318. value = GETATTR("script_focus")
  15319. JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value)
  15320. # field value ------------------------------------------------------------
  15321. value = GETATTR("field_value") # field value
  15322. text = JM_StrAsChar(value) # convert to text (may fail!)
  15323. if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
  15324. if not value:
  15325. mupdf.pdf_set_field_value(pdf, annot_obj, "Off", 1)
  15326. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), "Off")
  15327. else:
  15328. # TODO check if another button in the group is ON and if so set it Off
  15329. onstate = mupdf.pdf_button_field_on_state(annot_obj)
  15330. if onstate.m_internal:
  15331. on = mupdf.pdf_to_name(onstate)
  15332. mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
  15333. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
  15334. elif text:
  15335. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), text)
  15336. elif field_type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
  15337. onstate = mupdf.pdf_button_field_on_state(annot_obj)
  15338. on = onstate.pdf_to_name()
  15339. if value in (True, on) or text == 'Yes':
  15340. mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
  15341. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
  15342. mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('V'), on)
  15343. else:
  15344. mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('AS'), 'Off')
  15345. mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('V'), 'Off')
  15346. else:
  15347. if text:
  15348. mupdf.pdf_set_field_value(pdf, annot_obj, text, 1)
  15349. if field_type in (mupdf.PDF_WIDGET_TYPE_COMBOBOX, mupdf.PDF_WIDGET_TYPE_LISTBOX):
  15350. mupdf.pdf_dict_del(annot_obj, PDF_NAME('I'))
  15351. mupdf.pdf_dirty_annot(annot)
  15352. mupdf.pdf_set_annot_hot(annot, 1)
  15353. mupdf.pdf_set_annot_active(annot, 1)
  15354. mupdf.pdf_update_annot(annot)
  15355. def JM_show_string_cs(
  15356. text,
  15357. user_font,
  15358. trm,
  15359. s,
  15360. wmode,
  15361. bidi_level,
  15362. markup_dir,
  15363. language,
  15364. ):
  15365. i = 0
  15366. while i < len(s):
  15367. l, ucs = mupdf.fz_chartorune(s[i:])
  15368. i += l
  15369. gid = mupdf.fz_encode_character_sc(user_font, ucs)
  15370. if gid == 0:
  15371. gid, font = mupdf.fz_encode_character_with_fallback(user_font, ucs, 0, language)
  15372. else:
  15373. font = user_font
  15374. mupdf.fz_show_glyph(text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language)
  15375. adv = mupdf.fz_advance_glyph(font, gid, wmode)
  15376. if wmode == 0:
  15377. trm = mupdf.fz_pre_translate(trm, adv, 0)
  15378. else:
  15379. trm = mupdf.fz_pre_translate(trm, 0, -adv)
  15380. return trm
  15381. def JM_UnicodeFromBuffer(buff):
  15382. buff_bytes = mupdf.fz_buffer_extract_copy(buff)
  15383. val = buff_bytes.decode(errors='replace')
  15384. z = val.find(chr(0))
  15385. if z >= 0:
  15386. val = val[:z]
  15387. return val
  15388. def message_warning(text):
  15389. '''
  15390. Generate a warning.
  15391. '''
  15392. message(f'warning: {text}')
  15393. def JM_update_stream(doc, obj, buffer_, compress):
  15394. '''
  15395. update a stream object
  15396. compress stream when beneficial
  15397. '''
  15398. if compress:
  15399. length, _ = mupdf.fz_buffer_storage(buffer_)
  15400. if length > 30: # ignore small stuff
  15401. buffer_compressed = JM_compress_buffer(buffer_)
  15402. assert isinstance(buffer_compressed, mupdf.FzBuffer)
  15403. if buffer_compressed.m_internal:
  15404. length_compressed, _ = mupdf.fz_buffer_storage(buffer_compressed)
  15405. if length_compressed < length: # was it worth the effort?
  15406. mupdf.pdf_dict_put(
  15407. obj,
  15408. mupdf.PDF_ENUM_NAME_Filter,
  15409. mupdf.PDF_ENUM_NAME_FlateDecode,
  15410. )
  15411. mupdf.pdf_update_stream(doc, obj, buffer_compressed, 1)
  15412. return
  15413. mupdf.pdf_update_stream(doc, obj, buffer_, 0)
  15414. def JM_xobject_from_page(pdfout, fsrcpage, xref, gmap):
  15415. '''
  15416. Make an XObject from a PDF page
  15417. For a positive xref assume that its object can be used instead
  15418. '''
  15419. assert isinstance(gmap, mupdf.PdfGraftMap), f'{type(gmap)=}'
  15420. if xref > 0:
  15421. xobj1 = mupdf.pdf_new_indirect(pdfout, xref, 0)
  15422. else:
  15423. srcpage = _as_pdf_page(fsrcpage.this)
  15424. spageref = srcpage.obj()
  15425. mediabox = mupdf.pdf_to_rect(mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('MediaBox')))
  15426. # Deep-copy resources object of source page
  15427. o = mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('Resources'))
  15428. if gmap.m_internal:
  15429. # use graftmap when possible
  15430. resources = mupdf.pdf_graft_mapped_object(gmap, o)
  15431. else:
  15432. resources = mupdf.pdf_graft_object(pdfout, o)
  15433. # get spgage contents source
  15434. res = JM_read_contents(spageref)
  15435. #-------------------------------------------------------------
  15436. # create XObject representing the source page
  15437. #-------------------------------------------------------------
  15438. xobj1 = mupdf.pdf_new_xobject(pdfout, mediabox, mupdf.FzMatrix(), mupdf.PdfObj(0), res)
  15439. # store spage contents
  15440. JM_update_stream(pdfout, xobj1, res, 1)
  15441. # store spage resources
  15442. mupdf.pdf_dict_put(xobj1, PDF_NAME('Resources'), resources)
  15443. return xobj1
  15444. def PySequence_Check(s):
  15445. return isinstance(s, (tuple, list))
  15446. def PySequence_Size(s):
  15447. return len(s)
  15448. # constants: error messages. These are also in extra.i.
  15449. #
  15450. MSG_BAD_ANNOT_TYPE = "bad annot type"
  15451. MSG_BAD_APN = "bad or missing annot AP/N"
  15452. MSG_BAD_ARG_INK_ANNOT = "arg must be seq of seq of float pairs"
  15453. MSG_BAD_ARG_POINTS = "bad seq of points"
  15454. MSG_BAD_BUFFER = "bad type: 'buffer'"
  15455. MSG_BAD_COLOR_SEQ = "bad color sequence"
  15456. MSG_BAD_DOCUMENT = "cannot open broken document"
  15457. MSG_BAD_FILETYPE = "bad filetype"
  15458. MSG_BAD_LOCATION = "bad location"
  15459. MSG_BAD_OC_CONFIG = "bad config number"
  15460. MSG_BAD_OC_LAYER = "bad layer number"
  15461. MSG_BAD_OC_REF = "bad 'oc' reference"
  15462. MSG_BAD_PAGEID = "bad page id"
  15463. MSG_BAD_PAGENO = "bad page number(s)"
  15464. MSG_BAD_PDFROOT = "PDF has no root"
  15465. MSG_BAD_RECT = "rect is infinite or empty"
  15466. MSG_BAD_TEXT = "bad type: 'text'"
  15467. MSG_BAD_XREF = "bad xref"
  15468. MSG_COLOR_COUNT_FAILED = "color count failed"
  15469. MSG_FILE_OR_BUFFER = "need font file or buffer"
  15470. MSG_FONT_FAILED = "cannot create font"
  15471. MSG_IS_NO_ANNOT = "is no annotation"
  15472. MSG_IS_NO_IMAGE = "is no image"
  15473. MSG_IS_NO_PDF = "is no PDF"
  15474. MSG_IS_NO_DICT = "object is no PDF dict"
  15475. MSG_PIX_NOALPHA = "source pixmap has no alpha"
  15476. MSG_PIXEL_OUTSIDE = "pixel(s) outside image"
  15477. JM_Exc_FileDataError = 'FileDataError'
  15478. PyExc_ValueError = 'ValueError'
  15479. def RAISEPY( msg, exc):
  15480. #JM_Exc_CurrentException=exc
  15481. #fz_throw(context, FZ_ERROR_GENERIC, msg)
  15482. raise Exception( msg)
  15483. def PyUnicode_DecodeRawUnicodeEscape(s, errors='strict'):
  15484. # FIXED: handle raw unicode escape sequences
  15485. if not s:
  15486. return ""
  15487. if isinstance(s, str):
  15488. rc = s.encode("utf8", errors=errors)
  15489. elif isinstance(s, bytes):
  15490. rc = s[:]
  15491. ret = rc.decode('raw_unicode_escape', errors=errors)
  15492. return ret
  15493. def CheckColor(c: OptSeq):
  15494. if c:
  15495. if (
  15496. type(c) not in (list, tuple)
  15497. or len(c) not in (1, 3, 4)
  15498. or min(c) < 0
  15499. or max(c) > 1
  15500. ):
  15501. raise ValueError("need 1, 3 or 4 color components in range 0 to 1")
  15502. def CheckFont(page: Page, fontname: str) -> tuple:
  15503. """Return an entry in the page's font list if reference name matches.
  15504. """
  15505. for f in page.get_fonts():
  15506. if f[4] == fontname:
  15507. return f
  15508. def CheckFontInfo(doc: Document, xref: int) -> list:
  15509. """Return a font info if present in the document.
  15510. """
  15511. for f in doc.FontInfos:
  15512. if xref == f[0]:
  15513. return f
  15514. def CheckMarkerArg(quads: typing.Any) -> tuple:
  15515. if CheckRect(quads):
  15516. r = Rect(quads)
  15517. return (r.quad,)
  15518. if CheckQuad(quads):
  15519. return (quads,)
  15520. for q in quads:
  15521. if not (CheckRect(q) or CheckQuad(q)):
  15522. raise ValueError("bad quads entry")
  15523. return quads
  15524. def CheckMorph(o: typing.Any) -> bool:
  15525. if not bool(o):
  15526. return False
  15527. if not (type(o) in (list, tuple) and len(o) == 2):
  15528. raise ValueError("morph must be a sequence of length 2")
  15529. if not (len(o[0]) == 2 and len(o[1]) == 6):
  15530. raise ValueError("invalid morph param 0")
  15531. if not o[1][4] == o[1][5] == 0:
  15532. raise ValueError("invalid morph param 1")
  15533. return True
  15534. def CheckParent(o: typing.Any):
  15535. return
  15536. if not hasattr(o, "parent") or o.parent is None:
  15537. raise ValueError(f"orphaned object {type(o)=}: parent is None")
  15538. def CheckQuad(q: typing.Any) -> bool:
  15539. """Check whether an object is convex, not empty quad-like.
  15540. It must be a sequence of 4 number pairs.
  15541. """
  15542. try:
  15543. q0 = Quad(q)
  15544. except Exception:
  15545. if g_exceptions_verbose > 1: exception_info()
  15546. return False
  15547. return q0.is_convex
  15548. def CheckRect(r: typing.Any) -> bool:
  15549. """Check whether an object is non-degenerate rect-like.
  15550. It must be a sequence of 4 numbers.
  15551. """
  15552. try:
  15553. r = Rect(r)
  15554. except Exception:
  15555. if g_exceptions_verbose > 1: exception_info()
  15556. return False
  15557. return not (r.is_empty or r.is_infinite)
  15558. def ColorCode(c: typing.Union[list, tuple, float, None], f: str) -> str:
  15559. if not c:
  15560. return ""
  15561. if hasattr(c, "__float__"):
  15562. c = (c,)
  15563. CheckColor(c)
  15564. if len(c) == 1:
  15565. s = _format_g(c[0]) + " "
  15566. return s + "G " if f == "c" else s + "g "
  15567. if len(c) == 3:
  15568. s = _format_g(tuple(c)) + " "
  15569. return s + "RG " if f == "c" else s + "rg "
  15570. s = _format_g(tuple(c)) + " "
  15571. return s + "K " if f == "c" else s + "k "
  15572. def Page__add_text_marker(self, quads, annot_type):
  15573. pdfpage = self._pdf_page()
  15574. rotation = JM_page_rotation(pdfpage)
  15575. def final():
  15576. if rotation != 0:
  15577. mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), rotation)
  15578. try:
  15579. if rotation != 0:
  15580. mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), 0)
  15581. annot = mupdf.pdf_create_annot(pdfpage, annot_type)
  15582. for item in quads:
  15583. q = JM_quad_from_py(item)
  15584. mupdf.pdf_add_annot_quad_point(annot, q)
  15585. mupdf.pdf_update_annot(annot)
  15586. JM_add_annot_id(annot, "A")
  15587. final()
  15588. except Exception:
  15589. if g_exceptions_verbose: exception_info()
  15590. final()
  15591. return
  15592. return Annot(annot)
  15593. def PDF_NAME(x):
  15594. assert isinstance(x, str)
  15595. ret = getattr(mupdf, f'PDF_ENUM_NAME_{x}')
  15596. # Note that we return a (swig proxy for) pdf_obj*, not a mupdf.PdfObj. In
  15597. # the C++ API, the constructor PdfObj::PdfObj(pdf_obj*) is marked as
  15598. # explicit, but this seems to be ignored by SWIG. If SWIG started to
  15599. # generate code that respected `explicit`, we would need to do `return
  15600. # mupdf.PdfObj(ret)`.
  15601. #
  15602. # [Compare with extra.i, where we define our own PDF_NAME2() macro that
  15603. # returns a mupdf::PdfObj.]
  15604. return ret
  15605. def UpdateFontInfo(doc: Document, info: typing.Sequence):
  15606. xref = info[0]
  15607. found = False
  15608. for i, fi in enumerate(doc.FontInfos):
  15609. if fi[0] == xref:
  15610. found = True
  15611. break
  15612. if found:
  15613. doc.FontInfos[i] = info
  15614. else:
  15615. doc.FontInfos.append(info)
  15616. def args_match(args, *types):
  15617. '''
  15618. Returns true if <args> matches <types>.
  15619. Each item in <types> is a type or tuple of types. Any of these types will
  15620. match an item in <args>. `None` will match anything in <args>. `type(None)`
  15621. will match an arg whose value is `None`.
  15622. '''
  15623. j = 0
  15624. for i in range(len(types)):
  15625. type_ = types[i]
  15626. if j >= len(args):
  15627. if isinstance(type_, tuple) and None in type_:
  15628. # arg is missing but has default value.
  15629. continue
  15630. else:
  15631. return False
  15632. if type_ is not None and not isinstance(args[j], type_):
  15633. return False
  15634. j += 1
  15635. if j != len(args):
  15636. return False
  15637. return True
  15638. def calc_image_matrix(width, height, tr, rotate, keep):
  15639. '''
  15640. # compute image insertion matrix
  15641. '''
  15642. trect = JM_rect_from_py(tr)
  15643. rot = mupdf.fz_rotate(rotate)
  15644. trw = trect.x1 - trect.x0
  15645. trh = trect.y1 - trect.y0
  15646. w = trw
  15647. h = trh
  15648. if keep:
  15649. large = max(width, height)
  15650. fw = width / large
  15651. fh = height / large
  15652. else:
  15653. fw = fh = 1
  15654. small = min(fw, fh)
  15655. if rotate != 0 and rotate != 180:
  15656. f = fw
  15657. fw = fh
  15658. fh = f
  15659. if fw < 1:
  15660. if trw / fw > trh / fh:
  15661. w = trh * small
  15662. h = trh
  15663. else:
  15664. w = trw
  15665. h = trw / small
  15666. elif fw != fh:
  15667. if trw / fw > trh / fh:
  15668. w = trh / small
  15669. h = trh
  15670. else:
  15671. w = trw
  15672. h = trw * small
  15673. else:
  15674. w = trw
  15675. h = trh
  15676. tmp = mupdf.fz_make_point(
  15677. (trect.x0 + trect.x1) / 2,
  15678. (trect.y0 + trect.y1) / 2,
  15679. )
  15680. mat = mupdf.fz_make_matrix(1, 0, 0, 1, -0.5, -0.5)
  15681. mat = mupdf.fz_concat(mat, rot)
  15682. mat = mupdf.fz_concat(mat, mupdf.fz_scale(w, h))
  15683. mat = mupdf.fz_concat(mat, mupdf.fz_translate(tmp.x, tmp.y))
  15684. return mat
  15685. def detect_super_script(line, ch):
  15686. if line.m_internal.wmode == 0 and line.m_internal.dir.x == 1 and line.m_internal.dir.y == 0:
  15687. return ch.m_internal.origin.y < line.m_internal.first_char.origin.y - ch.m_internal.size * 0.1
  15688. return 0
  15689. def dir_str(x):
  15690. ret = f'{x} {type(x)} ({len(dir(x))}):\n'
  15691. for i in dir(x):
  15692. ret += f' {i}\n'
  15693. return ret
  15694. def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, ordering: int) -> str:
  15695. """ Return a PDF string enclosed in [] brackets, suitable for the PDF TJ
  15696. operator.
  15697. Notes:
  15698. The input string is converted to either 2 or 4 hex digits per character.
  15699. Args:
  15700. simple: no glyphs: 2-chars, use char codes as the glyph
  15701. glyphs: 2-chars, use glyphs instead of char codes (Symbol,
  15702. ZapfDingbats)
  15703. not simple: ordering < 0: 4-chars, use glyphs not char codes
  15704. ordering >=0: a CJK font! 4 chars, use char codes as glyphs
  15705. """
  15706. if text.startswith("[<") and text.endswith(">]"): # already done
  15707. return text
  15708. if not bool(text):
  15709. return "[<>]"
  15710. if simple: # each char or its glyph is coded as a 2-byte hex
  15711. if glyphs is None: # not Symbol, not ZapfDingbats: use char code
  15712. otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text])
  15713. else: # Symbol or ZapfDingbats: use glyphs
  15714. otxt = "".join(
  15715. ["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text]
  15716. )
  15717. return "[<" + otxt + ">]"
  15718. # non-simple fonts: each char or its glyph is coded as 4-byte hex
  15719. if ordering < 0: # not a CJK font: use the glyphs
  15720. otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text])
  15721. else: # CJK: use the char codes
  15722. otxt = "".join(["%04x" % ord(c) for c in text])
  15723. return "[<" + otxt + ">]"
  15724. def get_pdf_str(s: str) -> str:
  15725. """ Return a PDF string depending on its coding.
  15726. Notes:
  15727. Returns a string bracketed with either "()" or "<>" for hex values.
  15728. If only ascii then "(original)" is returned, else if only 8 bit chars
  15729. then "(original)" with interspersed octal strings \nnn is returned,
  15730. else a string "<FEFF[hexstring]>" is returned, where [hexstring] is the
  15731. UTF-16BE encoding of the original.
  15732. """
  15733. if not bool(s):
  15734. return "()"
  15735. def make_utf16be(s):
  15736. r = bytearray([254, 255]) + bytearray(s, "UTF-16BE")
  15737. return "<" + r.hex() + ">" # brackets indicate hex
  15738. # The following either returns the original string with mixed-in
  15739. # octal numbers \nnn for chars outside the ASCII range, or returns
  15740. # the UTF-16BE BOM version of the string.
  15741. r = ""
  15742. for c in s:
  15743. oc = ord(c)
  15744. if oc > 255: # shortcut if beyond 8-bit code range
  15745. return make_utf16be(s)
  15746. if oc > 31 and oc < 127: # in ASCII range
  15747. if c in ("(", ")", "\\"): # these need to be escaped
  15748. r += "\\"
  15749. r += c
  15750. continue
  15751. if oc > 127: # beyond ASCII
  15752. r += "\\%03o" % oc
  15753. continue
  15754. # now the white spaces
  15755. if oc == 8: # backspace
  15756. r += "\\b"
  15757. elif oc == 9: # tab
  15758. r += "\\t"
  15759. elif oc == 10: # line feed
  15760. r += "\\n"
  15761. elif oc == 12: # form feed
  15762. r += "\\f"
  15763. elif oc == 13: # carriage return
  15764. r += "\\r"
  15765. else:
  15766. r += "\\267" # unsupported: replace by 0xB7
  15767. return "(" + r + ")"
  15768. def get_tessdata(tessdata=None):
  15769. """Detect Tesseract language support folder.
  15770. This function is used to enable OCR via Tesseract even if the language
  15771. support folder is not specified directly or in environment variable
  15772. TESSDATA_PREFIX.
  15773. * If <tessdata> is set we return it directly.
  15774. * Otherwise we return `os.environ['TESSDATA_PREFIX']` if set.
  15775. * Otherwise we search for a Tesseract installation and return its language
  15776. support folder.
  15777. * Otherwise we raise an exception.
  15778. """
  15779. if tessdata:
  15780. return tessdata
  15781. tessdata = os.getenv("TESSDATA_PREFIX")
  15782. if tessdata: # use environment variable if set
  15783. return tessdata
  15784. # Try to locate the tesseract-ocr installation.
  15785. import subprocess
  15786. cp = subprocess.run('tesseract --list-langs', shell=1, capture_output=1, check=0, text=True)
  15787. if cp.returncode == 0:
  15788. m = re.search('List of available languages in "(.+)"', cp.stdout)
  15789. if m:
  15790. tessdata = m.group(1)
  15791. return tessdata
  15792. # Windows systems:
  15793. if sys.platform == "win32":
  15794. cp = subprocess.run("where tesseract", shell=1, capture_output=1, check=0, text=True)
  15795. response = cp.stdout.strip()
  15796. if cp.returncode or not response:
  15797. raise RuntimeError("No tessdata specified and Tesseract is not installed")
  15798. dirname = os.path.dirname(response) # path of tesseract.exe
  15799. tessdata = os.path.join(dirname, "tessdata") # language support
  15800. if os.path.exists(tessdata): # all ok?
  15801. return tessdata
  15802. else: # should not happen!
  15803. raise RuntimeError("No tessdata specified and Tesseract installation has no {tessdata} folder")
  15804. # Unix-like systems:
  15805. attempts = list()
  15806. for path in 'tesseract-ocr', 'tesseract':
  15807. cp = subprocess.run(f'whereis {path}', shell=1, capture_output=1, check=0, text=True)
  15808. if cp.returncode == 0:
  15809. response = cp.stdout.strip().split()
  15810. if len(response) == 2:
  15811. # search tessdata in folder structure
  15812. dirname = response[1] # contains tesseract-ocr installation folder
  15813. pattern = f"{dirname}/*/tessdata"
  15814. attempts.append(pattern)
  15815. tessdatas = glob.glob(pattern)
  15816. tessdatas.sort()
  15817. if tessdatas:
  15818. return tessdatas[-1]
  15819. if attempts:
  15820. text = 'No tessdata specified and no match for:\n'
  15821. for attempt in attempts:
  15822. text += f' {attempt}'
  15823. raise RuntimeError(text)
  15824. else:
  15825. raise RuntimeError('No tessdata specified and Tesseract is not installed')
  15826. def css_for_pymupdf_font(
  15827. fontcode: str, *, CSS: OptStr = None, archive: AnyType = None, name: OptStr = None
  15828. ) -> str:
  15829. """Create @font-face items for the given fontcode of pymupdf-fonts.
  15830. Adds @font-face support for fonts contained in package pymupdf-fonts.
  15831. Creates a CSS font-family for all fonts starting with string 'fontcode'.
  15832. Note:
  15833. The font naming convention in package pymupdf-fonts is "fontcode<sf>",
  15834. where the suffix "sf" is either empty or one of "it", "bo" or "bi".
  15835. These suffixes thus represent the regular, italic, bold or bold-italic
  15836. variants of a font. For example, font code "notos" refers to fonts
  15837. "notos" - "Noto Sans Regular"
  15838. "notosit" - "Noto Sans Italic"
  15839. "notosbo" - "Noto Sans Bold"
  15840. "notosbi" - "Noto Sans Bold Italic"
  15841. This function creates four CSS @font-face definitions and collectively
  15842. assigns the font-family name "notos" to them (or the "name" value).
  15843. All fitting font buffers of the pymupdf-fonts package are placed / added
  15844. to the archive provided as parameter.
  15845. To use the font in pymupdf.Story, execute 'set_font(fontcode)'. The correct
  15846. font weight (bold) or style (italic) will automatically be selected.
  15847. Expects and returns the CSS source, with the new CSS definitions appended.
  15848. Args:
  15849. fontcode: (str) font code for naming the font variants to include.
  15850. E.g. "fig" adds notos, notosi, notosb, notosbi fonts.
  15851. A maximum of 4 font variants is accepted.
  15852. CSS: (str) CSS string to add @font-face definitions to.
  15853. archive: (Archive, mandatory) where to place the font buffers.
  15854. name: (str) use this as family-name instead of 'fontcode'.
  15855. Returns:
  15856. Modified CSS, with appended @font-face statements for each font variant
  15857. of fontcode.
  15858. Fontbuffers associated with "fontcode" will be added to 'archive'.
  15859. """
  15860. # @font-face template string
  15861. CSSFONT = "\n@font-face {font-family: %s; src: url(%s);%s%s}\n"
  15862. if not type(archive) is Archive:
  15863. raise ValueError("'archive' must be an Archive")
  15864. if CSS is None:
  15865. CSS = ""
  15866. # select font codes starting with the pass-in string
  15867. font_keys = [k for k in fitz_fontdescriptors.keys() if k.startswith(fontcode)]
  15868. if font_keys == []:
  15869. raise ValueError(f"No font code '{fontcode}' found in pymupdf-fonts.")
  15870. if len(font_keys) > 4:
  15871. raise ValueError("fontcode too short")
  15872. if name is None: # use this name for font-family
  15873. name = fontcode
  15874. for fkey in font_keys:
  15875. font = fitz_fontdescriptors[fkey]
  15876. bold = font["bold"] # determine font property
  15877. italic = font["italic"] # determine font property
  15878. fbuff = font["loader"]() # load the fontbuffer
  15879. archive.add(fbuff, fkey) # update the archive
  15880. bold_text = "font-weight: bold;" if bold else ""
  15881. italic_text = "font-style: italic;" if italic else ""
  15882. CSS += CSSFONT % (name, fkey, bold_text, italic_text)
  15883. return CSS
  15884. def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float:
  15885. """Calculate length of a string for a built-in font.
  15886. Args:
  15887. fontname: name of the font.
  15888. fontsize: font size points.
  15889. encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic.
  15890. Returns:
  15891. (float) length of text.
  15892. """
  15893. fontname = fontname.lower()
  15894. basename = Base14_fontdict.get(fontname, None)
  15895. glyphs = None
  15896. if basename == "Symbol":
  15897. glyphs = symbol_glyphs
  15898. if basename == "ZapfDingbats":
  15899. glyphs = zapf_glyphs
  15900. if glyphs is not None:
  15901. w = sum([glyphs[ord(c)][1] if ord(c) < 256 else glyphs[183][1] for c in text])
  15902. return w * fontsize
  15903. if fontname in Base14_fontdict.keys():
  15904. return util_measure_string(
  15905. text, Base14_fontdict[fontname], fontsize, encoding
  15906. )
  15907. if fontname in (
  15908. "china-t",
  15909. "china-s",
  15910. "china-ts",
  15911. "china-ss",
  15912. "japan",
  15913. "japan-s",
  15914. "korea",
  15915. "korea-s",
  15916. ):
  15917. return len(text) * fontsize
  15918. raise ValueError("Font '%s' is unsupported" % fontname)
  15919. def image_profile(img: ByteString) -> dict:
  15920. """ Return basic properties of an image.
  15921. Args:
  15922. img: bytes, bytearray, io.BytesIO object or an opened image file.
  15923. Returns:
  15924. A dictionary with keys width, height, colorspace.n, bpc, type, ext and size,
  15925. where 'type' is the MuPDF image type (0 to 14) and 'ext' the suitable
  15926. file extension.
  15927. """
  15928. if type(img) is io.BytesIO:
  15929. stream = img.getvalue()
  15930. elif hasattr(img, "read"):
  15931. stream = img.read()
  15932. elif type(img) in (bytes, bytearray):
  15933. stream = img
  15934. else:
  15935. raise ValueError("bad argument 'img'")
  15936. return TOOLS.image_profile(stream)
  15937. def jm_append_merge(dev):
  15938. '''
  15939. Append current path to list or merge into last path of the list.
  15940. (1) Append if first path, different item lists or not a 'stroke' version
  15941. of previous path
  15942. (2) If new path has the same items, merge its content into previous path
  15943. and change path["type"] to "fs".
  15944. (3) If "out" is callable, skip the previous and pass dictionary to it.
  15945. '''
  15946. #log(f'{getattr(dev, "pathdict", None)=}')
  15947. assert isinstance(dev.out, list)
  15948. #log( f'{dev.out=}')
  15949. if callable(dev.method) or dev.method: # function or method
  15950. # callback.
  15951. if dev.method is None:
  15952. # fixme, this surely cannot happen?
  15953. assert 0
  15954. #resp = PyObject_CallFunctionObjArgs(out, dev.pathdict, NULL)
  15955. else:
  15956. #log(f'calling {dev.out=} {dev.method=} {dev.pathdict=}')
  15957. resp = getattr(dev.out, dev.method)(dev.pathdict)
  15958. if not resp:
  15959. message("calling cdrawings callback function/method failed!")
  15960. dev.pathdict = None
  15961. return
  15962. def append():
  15963. #log(f'jm_append_merge(): clearing dev.pathdict')
  15964. dev.out.append(dev.pathdict.copy())
  15965. dev.pathdict.clear()
  15966. assert isinstance(dev.out, list)
  15967. len_ = len(dev.out) # len of output list so far
  15968. #log('{len_=}')
  15969. if len_ == 0: # always append first path
  15970. return append()
  15971. #log(f'{getattr(dev, "pathdict", None)=}')
  15972. thistype = dev.pathdict[ dictkey_type]
  15973. #log(f'{thistype=}')
  15974. if thistype != 's': # if not stroke, then append
  15975. return append()
  15976. prev = dev.out[ len_-1] # get prev path
  15977. #log( f'{prev=}')
  15978. prevtype = prev[ dictkey_type]
  15979. #log( f'{prevtype=}')
  15980. if prevtype != 'f': # if previous not fill, append
  15981. return append()
  15982. # last check: there must be the same list of items for "f" and "s".
  15983. previtems = prev[ dictkey_items]
  15984. thisitems = dev.pathdict[ dictkey_items]
  15985. if previtems != thisitems:
  15986. return append()
  15987. #rc = PyDict_Merge(prev, dev.pathdict, 0); // merge with no override
  15988. try:
  15989. for k, v in dev.pathdict.items():
  15990. if k not in prev:
  15991. prev[k] = v
  15992. rc = 0
  15993. except Exception:
  15994. if g_exceptions_verbose: exception_info()
  15995. #raise
  15996. rc = -1
  15997. if rc == 0:
  15998. prev[ dictkey_type] = 'fs'
  15999. dev.pathdict.clear()
  16000. else:
  16001. message("could not merge stroke and fill path")
  16002. append()
  16003. def jm_bbox_add_rect( dev, ctx, rect, code):
  16004. if not dev.layers:
  16005. dev.result.append( (code, JM_py_from_rect(rect)))
  16006. else:
  16007. dev.result.append( (code, JM_py_from_rect(rect), dev.layer_name))
  16008. def jm_bbox_fill_image( dev, ctx, image, ctm, alpha, color_params):
  16009. r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
  16010. r = mupdf.ll_fz_transform_rect( r.internal(), ctm)
  16011. jm_bbox_add_rect( dev, ctx, r, "fill-image")
  16012. def jm_bbox_fill_image_mask( dev, ctx, image, ctm, colorspace, color, alpha, color_params):
  16013. try:
  16014. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_transform_rect(mupdf.fz_unit_rect, ctm), "fill-imgmask")
  16015. except Exception:
  16016. if g_exceptions_verbose: exception_info()
  16017. raise
  16018. def jm_bbox_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
  16019. even_odd = True if even_odd else False
  16020. try:
  16021. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path(path, None, ctm), "fill-path")
  16022. except Exception:
  16023. if g_exceptions_verbose: exception_info()
  16024. raise
  16025. def jm_bbox_fill_shade( dev, ctx, shade, ctm, alpha, color_params):
  16026. try:
  16027. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_shade( shade, ctm), "fill-shade")
  16028. except Exception:
  16029. if g_exceptions_verbose: exception_info()
  16030. raise
  16031. def jm_bbox_stroke_text( dev, ctx, text, stroke, ctm, *args):
  16032. try:
  16033. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, stroke, ctm), "stroke-text")
  16034. except Exception:
  16035. if g_exceptions_verbose: exception_info()
  16036. raise
  16037. def jm_bbox_fill_text( dev, ctx, text, ctm, *args):
  16038. try:
  16039. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, None, ctm), "fill-text")
  16040. except Exception:
  16041. if g_exceptions_verbose: exception_info()
  16042. raise
  16043. def jm_bbox_ignore_text( dev, ctx, text, ctm):
  16044. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text(text, None, ctm), "ignore-text")
  16045. def jm_bbox_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
  16046. try:
  16047. jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path( path, stroke, ctm), "stroke-path")
  16048. except Exception:
  16049. if g_exceptions_verbose: exception_info()
  16050. raise
  16051. def jm_checkquad(dev):
  16052. '''
  16053. Check whether the last 4 lines represent a quad.
  16054. Because of how we count, the lines are a polyline already, i.e. last point
  16055. of a line equals 1st point of next line.
  16056. So we check for a polygon (last line's end point equals start point).
  16057. If not true we return 0.
  16058. '''
  16059. #log(f'{getattr(dev, "pathdict", None)=}')
  16060. items = dev.pathdict[ dictkey_items]
  16061. len_ = len(items)
  16062. f = [0] * 8 # coordinates of the 4 corners
  16063. # fill the 8 floats in f, start from items[-4:]
  16064. for i in range( 4): # store line start points
  16065. line = items[ len_ - 4 + i]
  16066. temp = JM_point_from_py( line[1])
  16067. f[i * 2] = temp.x
  16068. f[i * 2 + 1] = temp.y
  16069. lp = JM_point_from_py( line[ 2])
  16070. if lp.x != f[0] or lp.y != f[1]:
  16071. # not a polygon!
  16072. #dev.linecount -= 1
  16073. return 0
  16074. # we have detected a quad
  16075. dev.linecount = 0 # reset this
  16076. # a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items
  16077. # are pairs of floats representing a quad corner each.
  16078. # relationship of float array to quad points:
  16079. # (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr
  16080. q = mupdf.fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5])
  16081. rect = ('qu', JM_py_from_quad(q))
  16082. items[ len_ - 4] = rect # replace item -4 by rect
  16083. del items[ len_ - 3 : len_] # delete remaining 3 items
  16084. return 1
  16085. def jm_checkrect(dev):
  16086. '''
  16087. Check whether the last 3 path items represent a rectangle.
  16088. Returns 1 if we have modified the path, otherwise 0.
  16089. '''
  16090. #log(f'{getattr(dev, "pathdict", None)=}')
  16091. dev.linecount = 0 # reset line count
  16092. orientation = 0 # area orientation of rectangle
  16093. items = dev.pathdict[ dictkey_items]
  16094. len_ = len(items)
  16095. line0 = items[ len_ - 3]
  16096. ll = JM_point_from_py( line0[ 1])
  16097. lr = JM_point_from_py( line0[ 2])
  16098. # no need to extract "line1"!
  16099. line2 = items[ len_ - 1]
  16100. ur = JM_point_from_py( line2[ 1])
  16101. ul = JM_point_from_py( line2[ 2])
  16102. # Assumption:
  16103. # When decomposing rects, MuPDF always starts with a horizontal line,
  16104. # followed by a vertical line, followed by a horizontal line.
  16105. # First line: (ll, lr), third line: (ul, ur).
  16106. # If 1st line is below 3rd line, we record anti-clockwise (+1), else
  16107. # clockwise (-1) orientation.
  16108. if (0
  16109. or ll.y != lr.y
  16110. or ll.x != ul.x
  16111. or ur.y != ul.y
  16112. or ur.x != lr.x
  16113. ):
  16114. return 0 # not a rectangle
  16115. # we have a rect, replace last 3 "l" items by one "re" item.
  16116. if ul.y < lr.y:
  16117. r = mupdf.fz_make_rect(ul.x, ul.y, lr.x, lr.y)
  16118. orientation = 1
  16119. else:
  16120. r = mupdf.fz_make_rect(ll.x, ll.y, ur.x, ur.y)
  16121. orientation = -1
  16122. rect = ( 're', JM_py_from_rect(r), orientation)
  16123. items[ len_ - 3] = rect # replace item -3 by rect
  16124. del items[ len_ - 2 : len_] # delete remaining 2 items
  16125. return 1
  16126. def jm_trace_text( dev, text, type_, ctm, colorspace, color, alpha, seqno):
  16127. span = text.head
  16128. while 1:
  16129. if not span:
  16130. break
  16131. jm_trace_text_span( dev, span, type_, ctm, colorspace, color, alpha, seqno)
  16132. span = span.next
  16133. def jm_trace_text_span(dev, span, type_, ctm, colorspace, color, alpha, seqno):
  16134. '''
  16135. jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, size_t seqno)
  16136. '''
  16137. out_font = None
  16138. assert isinstance( span, mupdf.fz_text_span)
  16139. span = mupdf.FzTextSpan( span)
  16140. assert isinstance( ctm, mupdf.fz_matrix)
  16141. ctm = mupdf.FzMatrix( ctm)
  16142. fontname = JM_font_name( span.font())
  16143. #float rgb[3];
  16144. #PyObject *chars = PyTuple_New(span->len);
  16145. mat = mupdf.fz_concat(span.trm(), ctm) # text transformation matrix
  16146. dir = mupdf.fz_transform_vector(mupdf.fz_make_point(1, 0), mat) # writing direction
  16147. fsize = math.sqrt(dir.x * dir.x + dir.y * dir.y) # font size
  16148. dir = mupdf.fz_normalize_vector(dir)
  16149. space_adv = 0
  16150. asc = JM_font_ascender( span.font())
  16151. dsc = JM_font_descender( span.font())
  16152. if asc < 1e-3: # probably Tesseract font
  16153. dsc = -0.1
  16154. asc = 0.9
  16155. # compute effective ascender / descender
  16156. ascsize = asc * fsize / (asc - dsc)
  16157. dscsize = dsc * fsize / (asc - dsc)
  16158. fflags = 0 # font flags
  16159. mono = mupdf.fz_font_is_monospaced( span.font())
  16160. fflags += mono * TEXT_FONT_MONOSPACED
  16161. fflags += mupdf.fz_font_is_italic( span.font()) * TEXT_FONT_ITALIC
  16162. fflags += mupdf.fz_font_is_serif( span.font()) * TEXT_FONT_SERIFED
  16163. fflags += mupdf.fz_font_is_bold( span.font()) * TEXT_FONT_BOLD
  16164. last_adv = 0
  16165. # walk through characters of span
  16166. span_bbox = mupdf.FzRect()
  16167. rot = mupdf.fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0)
  16168. if dir.x == -1: # left-right flip
  16169. rot.d = 1
  16170. chars = []
  16171. for i in range( span.m_internal.len):
  16172. adv = 0
  16173. if span.items(i).gid >= 0:
  16174. adv = mupdf.fz_advance_glyph( span.font(), span.items(i).gid, span.m_internal.wmode)
  16175. adv *= fsize
  16176. last_adv = adv
  16177. if span.items(i).ucs == 32:
  16178. space_adv = adv
  16179. char_orig = mupdf.fz_make_point(span.items(i).x, span.items(i).y)
  16180. char_orig = mupdf.fz_transform_point(char_orig, ctm)
  16181. m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y)
  16182. m1 = mupdf.fz_concat(m1, rot)
  16183. m1 = mupdf.fz_concat(m1, mupdf.FzMatrix(1, 0, 0, 1, char_orig.x, char_orig.y))
  16184. x0 = char_orig.x
  16185. x1 = x0 + adv
  16186. if (
  16187. (mat.d > 0 and (dir.x == 1 or dir.x == -1))
  16188. or
  16189. (mat.b != 0 and mat.b == -mat.c)
  16190. ): # up-down flip
  16191. y0 = char_orig.y + dscsize
  16192. y1 = char_orig.y + ascsize
  16193. else:
  16194. y0 = char_orig.y - ascsize
  16195. y1 = char_orig.y - dscsize
  16196. char_bbox = mupdf.fz_make_rect(x0, y0, x1, y1)
  16197. char_bbox = mupdf.fz_transform_rect(char_bbox, m1)
  16198. chars.append(
  16199. (
  16200. span.items(i).ucs,
  16201. span.items(i).gid,
  16202. (
  16203. char_orig.x,
  16204. char_orig.y,
  16205. ),
  16206. (
  16207. char_bbox.x0,
  16208. char_bbox.y0,
  16209. char_bbox.x1,
  16210. char_bbox.y1,
  16211. ),
  16212. )
  16213. )
  16214. if i > 0:
  16215. span_bbox = mupdf.fz_union_rect(span_bbox, char_bbox)
  16216. else:
  16217. span_bbox = char_bbox
  16218. chars = tuple(chars)
  16219. if not space_adv:
  16220. if not (fflags & TEXT_FONT_MONOSPACED):
  16221. c, out_font = mupdf.fz_encode_character_with_fallback( span.font(), 32, 0, 0)
  16222. space_adv = mupdf.fz_advance_glyph(
  16223. span.font(),
  16224. c,
  16225. span.m_internal.wmode,
  16226. )
  16227. space_adv *= fsize
  16228. if not space_adv:
  16229. space_adv = last_adv
  16230. else:
  16231. space_adv = last_adv # for mono, any char width suffices
  16232. # make the span dictionary
  16233. span_dict = dict()
  16234. span_dict[ 'dir'] = JM_py_from_point(dir)
  16235. span_dict[ 'font'] = JM_EscapeStrFromStr(fontname)
  16236. span_dict[ 'wmode'] = span.m_internal.wmode
  16237. span_dict[ 'flags'] =fflags
  16238. span_dict[ "bidi_lvl"] =span.m_internal.bidi_level
  16239. span_dict[ "bidi_dir"] = span.m_internal.markup_dir
  16240. span_dict[ 'ascender'] = asc
  16241. span_dict[ 'descender'] = dsc
  16242. span_dict[ 'colorspace'] = 3
  16243. if colorspace:
  16244. rgb = mupdf.fz_convert_color(
  16245. mupdf.FzColorspace( mupdf.ll_fz_keep_colorspace( colorspace)),
  16246. color,
  16247. mupdf.fz_device_rgb(),
  16248. mupdf.FzColorspace(),
  16249. mupdf.FzColorParams(),
  16250. )
  16251. rgb = rgb[:3] # mupdf.fz_convert_color() always returns 4 items.
  16252. else:
  16253. rgb = (0, 0, 0)
  16254. if dev.linewidth > 0: # width of character border
  16255. linewidth = dev.linewidth
  16256. else:
  16257. linewidth = fsize * 0.05 # default: 5% of font size
  16258. #log(f'{dev.linewidth=:.4f} {fsize=:.4f} {linewidth=:.4f}')
  16259. span_dict[ 'color'] = rgb
  16260. span_dict[ 'size'] = fsize
  16261. span_dict[ "opacity"] = alpha
  16262. span_dict[ "linewidth"] = linewidth
  16263. span_dict[ "spacewidth"] = space_adv
  16264. span_dict[ 'type'] = type_
  16265. span_dict[ 'bbox'] = JM_py_from_rect(span_bbox)
  16266. span_dict[ 'layer'] = dev.layer_name
  16267. span_dict[ "seqno"] = seqno
  16268. span_dict[ 'chars'] = chars
  16269. #log(f'{span_dict=}')
  16270. dev.out.append( span_dict)
  16271. def jm_lineart_color(colorspace, color):
  16272. #log(f' ')
  16273. if colorspace:
  16274. try:
  16275. # Need to be careful to use a named Python object to ensure
  16276. # that the `params` we pass to mupdf.ll_fz_convert_color() is
  16277. # valid. E.g. doing:
  16278. #
  16279. # rgb = mupdf.ll_fz_convert_color(..., mupdf.FzColorParams().internal())
  16280. #
  16281. # - seems to end up with a corrupted `params`.
  16282. #
  16283. cs = mupdf.FzColorspace( mupdf.FzColorspace.Fixed_RGB)
  16284. cp = mupdf.FzColorParams()
  16285. rgb = mupdf.ll_fz_convert_color(
  16286. colorspace,
  16287. color,
  16288. cs.m_internal,
  16289. None,
  16290. cp.internal(),
  16291. )
  16292. except Exception:
  16293. if g_exceptions_verbose: exception_info()
  16294. raise
  16295. return rgb[:3]
  16296. return ()
  16297. def jm_lineart_drop_device(dev, ctx):
  16298. if isinstance(dev.out, list):
  16299. dev.out = []
  16300. dev.scissors = []
  16301. def jm_lineart_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
  16302. #log(f'{getattr(dev, "pathdict", None)=}')
  16303. #log(f'jm_lineart_fill_path(): {dev.seqno=}')
  16304. even_odd = True if even_odd else False
  16305. try:
  16306. assert isinstance( ctm, mupdf.fz_matrix)
  16307. dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
  16308. dev.path_type = trace_device_FILL_PATH
  16309. jm_lineart_path( dev, ctx, path)
  16310. if dev.pathdict is None:
  16311. return
  16312. #item_count = len(dev.pathdict[ dictkey_items])
  16313. #if item_count == 0:
  16314. # return
  16315. dev.pathdict[ dictkey_type] ="f"
  16316. dev.pathdict[ "even_odd"] = even_odd
  16317. dev.pathdict[ "fill_opacity"] = alpha
  16318. #log(f'setting dev.pathdict[ "closePath"] to false')
  16319. #dev.pathdict[ "closePath"] = False
  16320. dev.pathdict[ "fill"] = jm_lineart_color( colorspace, color)
  16321. dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
  16322. dev.pathdict[ "seqno"] = dev.seqno
  16323. #jm_append_merge(dev)
  16324. dev.pathdict[ 'layer'] = dev.layer_name
  16325. if dev.clips:
  16326. dev.pathdict[ 'level'] = dev.depth
  16327. jm_append_merge(dev)
  16328. dev.seqno += 1
  16329. #log(f'jm_lineart_fill_path() end: {getattr(dev, "pathdict", None)=}')
  16330. except Exception:
  16331. if g_exceptions_verbose: exception_info()
  16332. raise
  16333. # There are 3 text trace types:
  16334. # 0 - fill text (PDF Tr 0)
  16335. # 1 - stroke text (PDF Tr 1)
  16336. # 3 - ignore text (PDF Tr 3)
  16337. def jm_lineart_fill_text( dev, ctx, text, ctm, colorspace, color, alpha, color_params):
  16338. if 0:
  16339. log(f'{type(ctx)=} {ctx=}')
  16340. log(f'{type(dev)=} {dev=}')
  16341. log(f'{type(text)=} {text=}')
  16342. log(f'{type(ctm)=} {ctm=}')
  16343. log(f'{type(colorspace)=} {colorspace=}')
  16344. log(f'{type(color)=} {color=}')
  16345. log(f'{type(alpha)=} {alpha=}')
  16346. log(f'{type(color_params)=} {color_params=}')
  16347. jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev.seqno)
  16348. dev.seqno += 1
  16349. def jm_lineart_ignore_text(dev, text, ctm):
  16350. #log(f'{getattr(dev, "pathdict", None)=}')
  16351. jm_trace_text(dev, text, 3, ctm, None, None, 1, dev.seqno)
  16352. dev.seqno += 1
  16353. class Walker(mupdf.FzPathWalker2):
  16354. def __init__(self, dev):
  16355. super().__init__()
  16356. self.use_virtual_moveto()
  16357. self.use_virtual_lineto()
  16358. self.use_virtual_curveto()
  16359. self.use_virtual_closepath()
  16360. self.dev = dev
  16361. def closepath(self, ctx): # trace_close().
  16362. #log(f'Walker(): {self.dev.pathdict=}')
  16363. try:
  16364. if self.dev.linecount == 3:
  16365. if jm_checkrect(self.dev):
  16366. #log(f'end1: {self.dev.pathdict=}')
  16367. return
  16368. self.dev.linecount = 0 # reset # of consec. lines
  16369. if self.dev.havemove:
  16370. if self.dev.lastpoint != self.dev.firstpoint:
  16371. item = ("l", JM_py_from_point(self.dev.lastpoint),
  16372. JM_py_from_point(self.dev.firstpoint))
  16373. self.dev.pathdict[dictkey_items].append(item)
  16374. self.dev.lastpoint = self.dev.firstpoint
  16375. self.dev.pathdict["closePath"] = False
  16376. else:
  16377. #log('setting self.dev.pathdict[ "closePath"] to true')
  16378. self.dev.pathdict[ "closePath"] = True
  16379. #log(f'end2: {self.dev.pathdict=}')
  16380. self.dev.havemove = 0
  16381. except Exception:
  16382. if g_exceptions_verbose: exception_info()
  16383. raise
  16384. def curveto(self, ctx, x1, y1, x2, y2, x3, y3): # trace_curveto().
  16385. #log(f'Walker(): {self.dev.pathdict=}')
  16386. try:
  16387. self.dev.linecount = 0 # reset # of consec. lines
  16388. p1 = mupdf.fz_make_point(x1, y1)
  16389. p2 = mupdf.fz_make_point(x2, y2)
  16390. p3 = mupdf.fz_make_point(x3, y3)
  16391. p1 = mupdf.fz_transform_point(p1, self.dev.ctm)
  16392. p2 = mupdf.fz_transform_point(p2, self.dev.ctm)
  16393. p3 = mupdf.fz_transform_point(p3, self.dev.ctm)
  16394. self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p1)
  16395. self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p2)
  16396. self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p3)
  16397. list_ = (
  16398. "c",
  16399. JM_py_from_point(self.dev.lastpoint),
  16400. JM_py_from_point(p1),
  16401. JM_py_from_point(p2),
  16402. JM_py_from_point(p3),
  16403. )
  16404. self.dev.lastpoint = p3
  16405. self.dev.pathdict[ dictkey_items].append( list_)
  16406. except Exception:
  16407. if g_exceptions_verbose: exception_info()
  16408. raise
  16409. def lineto(self, ctx, x, y): # trace_lineto().
  16410. #log(f'Walker(): {self.dev.pathdict=}')
  16411. try:
  16412. p1 = mupdf.fz_transform_point( mupdf.fz_make_point(x, y), self.dev.ctm)
  16413. self.dev.pathrect = mupdf.fz_include_point_in_rect( self.dev.pathrect, p1)
  16414. list_ = (
  16415. 'l',
  16416. JM_py_from_point( self.dev.lastpoint),
  16417. JM_py_from_point(p1),
  16418. )
  16419. self.dev.lastpoint = p1
  16420. items = self.dev.pathdict[ dictkey_items]
  16421. items.append( list_)
  16422. self.dev.linecount += 1 # counts consecutive lines
  16423. if self.dev.linecount == 4 and self.dev.path_type != trace_device_FILL_PATH:
  16424. # shrink to "re" or "qu" item
  16425. jm_checkquad(self.dev)
  16426. except Exception:
  16427. if g_exceptions_verbose: exception_info()
  16428. raise
  16429. def moveto(self, ctx, x, y): # trace_moveto().
  16430. if 0 and isinstance(self.dev.pathdict, dict):
  16431. log(f'self.dev.pathdict:')
  16432. for n, v in self.dev.pathdict.items():
  16433. log( ' {type(n)=} {len(n)=} {n!r} {n}: {v!r}: {v}')
  16434. #log(f'Walker(): {type(self.dev.pathdict)=} {self.dev.pathdict=}')
  16435. try:
  16436. #log( '{=dev.ctm type(dev.ctm)}')
  16437. self.dev.lastpoint = mupdf.fz_transform_point(
  16438. mupdf.fz_make_point(x, y),
  16439. self.dev.ctm,
  16440. )
  16441. if mupdf.fz_is_infinite_rect( self.dev.pathrect):
  16442. self.dev.pathrect = mupdf.fz_make_rect(
  16443. self.dev.lastpoint.x,
  16444. self.dev.lastpoint.y,
  16445. self.dev.lastpoint.x,
  16446. self.dev.lastpoint.y,
  16447. )
  16448. self.dev.firstpoint = self.dev.lastpoint
  16449. self.dev.havemove = 1
  16450. self.dev.linecount = 0 # reset # of consec. lines
  16451. except Exception:
  16452. if g_exceptions_verbose: exception_info()
  16453. raise
  16454. def jm_lineart_path(dev, ctx, path):
  16455. '''
  16456. Create the "items" list of the path dictionary
  16457. * either create or empty the path dictionary
  16458. * reset the end point of the path
  16459. * reset count of consecutive lines
  16460. * invoke fz_walk_path(), which create the single items
  16461. * if no items detected, empty path dict again
  16462. '''
  16463. #log(f'{getattr(dev, "pathdict", None)=}')
  16464. try:
  16465. dev.pathrect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
  16466. dev.linecount = 0
  16467. dev.lastpoint = mupdf.FzPoint( 0, 0)
  16468. dev.pathdict = dict()
  16469. dev.pathdict[ dictkey_items] = []
  16470. # First time we create a Walker instance is slow, e.g. 0.3s, then later
  16471. # times run in around 0.01ms. If Walker is defined locally instead of
  16472. # globally, each time takes 0.3s.
  16473. #
  16474. walker = Walker(dev)
  16475. # Unlike fz_run_page(), fz_path_walker callbacks are not passed
  16476. # a pointer to the struct, instead they get an arbitrary
  16477. # void*. The underlying C++ Director callbacks use this void* to
  16478. # identify the fz_path_walker instance so in turn we need to pass
  16479. # arg=walker.m_internal.
  16480. mupdf.fz_walk_path( mupdf.FzPath(mupdf.ll_fz_keep_path(path)), walker, walker.m_internal)
  16481. # Check if any items were added ...
  16482. if not dev.pathdict[ dictkey_items]:
  16483. dev.pathdict = None
  16484. except Exception:
  16485. if g_exceptions_verbose: exception_info()
  16486. raise
  16487. def jm_lineart_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
  16488. #log(f'{dev.pathdict=} {dev.clips=}')
  16489. try:
  16490. assert isinstance( ctm, mupdf.fz_matrix)
  16491. dev.pathfactor = 1
  16492. if ctm.a != 0 and abs(ctm.a) == abs(ctm.d):
  16493. dev.pathfactor = abs(ctm.a)
  16494. elif ctm.b != 0 and abs(ctm.b) == abs(ctm.c):
  16495. dev.pathfactor = abs(ctm.b)
  16496. dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
  16497. dev.path_type = trace_device_STROKE_PATH
  16498. jm_lineart_path( dev, ctx, path)
  16499. if dev.pathdict is None:
  16500. return
  16501. dev.pathdict[ dictkey_type] = 's'
  16502. dev.pathdict[ 'stroke_opacity'] = alpha
  16503. dev.pathdict[ 'color'] = jm_lineart_color( colorspace, color)
  16504. dev.pathdict[ dictkey_width] = dev.pathfactor * stroke.linewidth
  16505. dev.pathdict[ 'lineCap'] = (
  16506. stroke.start_cap,
  16507. stroke.dash_cap,
  16508. stroke.end_cap,
  16509. )
  16510. dev.pathdict[ 'lineJoin'] = dev.pathfactor * stroke.linejoin
  16511. if 'closePath' not in dev.pathdict:
  16512. #log('setting dev.pathdict["closePath"] to false')
  16513. dev.pathdict['closePath'] = False
  16514. # output the "dashes" string
  16515. if stroke.dash_len:
  16516. buff = mupdf.fz_new_buffer( 256)
  16517. mupdf.fz_append_string( buff, "[ ") # left bracket
  16518. for i in range( stroke.dash_len):
  16519. # We use mupdf python's SWIG-generated floats_getitem() fn to
  16520. # access float *stroke.dash_list[].
  16521. value = mupdf.floats_getitem( stroke.dash_list, i) # stroke.dash_list[i].
  16522. mupdf.fz_append_string( buff, f'{_format_g(dev.pathfactor * value)} ')
  16523. mupdf.fz_append_string( buff, f'] {_format_g(dev.pathfactor * stroke.dash_phase)}')
  16524. dev.pathdict[ 'dashes'] = buff
  16525. else:
  16526. dev.pathdict[ 'dashes'] = '[] 0'
  16527. dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
  16528. dev.pathdict['layer'] = dev.layer_name
  16529. dev.pathdict[ 'seqno'] = dev.seqno
  16530. if dev.clips:
  16531. dev.pathdict[ 'level'] = dev.depth
  16532. jm_append_merge(dev)
  16533. dev.seqno += 1
  16534. except Exception:
  16535. if g_exceptions_verbose: exception_info()
  16536. raise
  16537. def jm_lineart_clip_path(dev, ctx, path, even_odd, ctm, scissor):
  16538. if not dev.clips:
  16539. return
  16540. dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
  16541. dev.path_type = trace_device_CLIP_PATH
  16542. jm_lineart_path(dev, ctx, path)
  16543. if dev.pathdict is None:
  16544. return
  16545. dev.pathdict[ dictkey_type] = 'clip'
  16546. dev.pathdict[ 'even_odd'] = bool(even_odd)
  16547. if 'closePath' not in dev.pathdict:
  16548. #log(f'setting dev.pathdict["closePath"] to False')
  16549. dev.pathdict['closePath'] = False
  16550. dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
  16551. dev.pathdict['level'] = dev.depth
  16552. dev.pathdict['layer'] = dev.layer_name
  16553. jm_append_merge(dev)
  16554. dev.depth += 1
  16555. def jm_lineart_clip_stroke_path(dev, ctx, path, stroke, ctm, scissor):
  16556. if not dev.clips:
  16557. return
  16558. dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
  16559. dev.path_type = trace_device_CLIP_STROKE_PATH
  16560. jm_lineart_path(dev, ctx, path)
  16561. if dev.pathdict is None:
  16562. return
  16563. dev.pathdict['dictkey_type'] = 'clip'
  16564. dev.pathdict['even_odd'] = None
  16565. if 'closePath' not in dev.pathdict:
  16566. #log(f'setting dev.pathdict["closePath"] to False')
  16567. dev.pathdict['closePath'] = False
  16568. dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
  16569. dev.pathdict['level'] = dev.depth
  16570. dev.pathdict['layer'] = dev.layer_name
  16571. jm_append_merge(dev)
  16572. dev.depth += 1
  16573. def jm_lineart_clip_stroke_text(dev, ctx, text, stroke, ctm, scissor):
  16574. if not dev.clips:
  16575. return
  16576. compute_scissor(dev)
  16577. dev.depth += 1
  16578. def jm_lineart_clip_text(dev, ctx, text, ctm, scissor):
  16579. if not dev.clips:
  16580. return
  16581. compute_scissor(dev)
  16582. dev.depth += 1
  16583. def jm_lineart_clip_image_mask( dev, ctx, image, ctm, scissor):
  16584. if not dev.clips:
  16585. return
  16586. compute_scissor(dev)
  16587. dev.depth += 1
  16588. def jm_lineart_pop_clip(dev, ctx):
  16589. if not dev.clips or not dev.scissors:
  16590. return
  16591. len_ = len(dev.scissors)
  16592. if len_ < 1:
  16593. return
  16594. del dev.scissors[-1]
  16595. dev.depth -= 1
  16596. def jm_lineart_begin_layer(dev, ctx, name):
  16597. if name:
  16598. dev.layer_name = name
  16599. else:
  16600. dev.layer_name = ""
  16601. def jm_lineart_end_layer(dev, ctx):
  16602. dev.layer_name = ""
  16603. def jm_lineart_begin_group(dev, ctx, bbox, cs, isolated, knockout, blendmode, alpha):
  16604. #log(f'{dev.pathdict=} {dev.clips=}')
  16605. if not dev.clips:
  16606. return
  16607. dev.pathdict = { # Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}",
  16608. "type": "group",
  16609. "rect": JM_py_from_rect(bbox),
  16610. "isolated": bool(isolated),
  16611. "knockout": bool(knockout),
  16612. "blendmode": mupdf.fz_blendmode_name(blendmode),
  16613. "opacity": alpha,
  16614. "level": dev.depth,
  16615. "layer": dev.layer_name
  16616. }
  16617. jm_append_merge(dev)
  16618. dev.depth += 1
  16619. def jm_lineart_end_group(dev, ctx):
  16620. #log(f'{dev.pathdict=} {dev.clips=}')
  16621. if not dev.clips:
  16622. return
  16623. dev.depth -= 1
  16624. def jm_lineart_stroke_text(dev, ctx, text, stroke, ctm, colorspace, color, alpha, color_params):
  16625. jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev.seqno)
  16626. dev.seqno += 1
  16627. def jm_dev_linewidth( dev, ctx, path, stroke, matrix, colorspace, color, alpha, color_params):
  16628. dev.linewidth = stroke.linewidth
  16629. jm_increase_seqno( dev, ctx)
  16630. def jm_increase_seqno( dev, ctx, *vargs):
  16631. try:
  16632. dev.seqno += 1
  16633. except Exception:
  16634. if g_exceptions_verbose: exception_info()
  16635. raise
  16636. def planish_line(p1: point_like, p2: point_like) -> Matrix:
  16637. """Compute matrix which maps line from p1 to p2 to the x-axis, such that it
  16638. maintains its length and p1 * matrix = Point(0, 0).
  16639. Args:
  16640. p1, p2: point_like
  16641. Returns:
  16642. Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at
  16643. the same distance to Point(0,0). Will always combine a rotation and a
  16644. transformation.
  16645. """
  16646. p1 = Point(p1)
  16647. p2 = Point(p2)
  16648. return Matrix(util_hor_matrix(p1, p2))
  16649. class JM_image_reporter_Filter(mupdf.PdfFilterOptions2):
  16650. def __init__(self):
  16651. super().__init__()
  16652. self.use_virtual_image_filter()
  16653. def image_filter( self, ctx, ctm, name, image):
  16654. assert isinstance(ctm, mupdf.fz_matrix)
  16655. JM_image_filter(self, mupdf.FzMatrix(ctm), name, image)
  16656. if mupdf_cppyy:
  16657. # cppyy doesn't appear to treat returned None as nullptr,
  16658. # resulting in obscure 'python exception' exception.
  16659. return 0
  16660. class JM_new_bbox_device_Device(mupdf.FzDevice2):
  16661. def __init__(self, result, layers):
  16662. super().__init__()
  16663. self.result = result
  16664. self.layers = layers
  16665. self.layer_name = ""
  16666. self.use_virtual_fill_path()
  16667. self.use_virtual_stroke_path()
  16668. self.use_virtual_fill_text()
  16669. self.use_virtual_stroke_text()
  16670. self.use_virtual_ignore_text()
  16671. self.use_virtual_fill_shade()
  16672. self.use_virtual_fill_image()
  16673. self.use_virtual_fill_image_mask()
  16674. self.use_virtual_begin_layer()
  16675. self.use_virtual_end_layer()
  16676. begin_layer = jm_lineart_begin_layer
  16677. end_layer = jm_lineart_end_layer
  16678. fill_path = jm_bbox_fill_path
  16679. stroke_path = jm_bbox_stroke_path
  16680. fill_text = jm_bbox_fill_text
  16681. stroke_text = jm_bbox_stroke_text
  16682. ignore_text = jm_bbox_ignore_text
  16683. fill_shade = jm_bbox_fill_shade
  16684. fill_image = jm_bbox_fill_image
  16685. fill_image_mask = jm_bbox_fill_image_mask
  16686. class JM_new_output_fileptr_Output(mupdf.FzOutput2):
  16687. def __init__(self, bio):
  16688. super().__init__()
  16689. self.bio = bio
  16690. self.use_virtual_write()
  16691. self.use_virtual_seek()
  16692. self.use_virtual_tell()
  16693. self.use_virtual_truncate()
  16694. def seek( self, ctx, offset, whence):
  16695. return self.bio.seek( offset, whence)
  16696. def tell( self, ctx):
  16697. ret = self.bio.tell()
  16698. return ret
  16699. def truncate( self, ctx):
  16700. return self.bio.truncate()
  16701. def write(self, ctx, data_raw, data_length):
  16702. data = mupdf.raw_to_python_bytes(data_raw, data_length)
  16703. return self.bio.write(data)
  16704. def compute_scissor(dev):
  16705. '''
  16706. Every scissor of a clip is a sub rectangle of the preceding clip scissor
  16707. if the clip level is larger.
  16708. '''
  16709. if dev.scissors is None:
  16710. dev.scissors = list()
  16711. num_scissors = len(dev.scissors)
  16712. if num_scissors > 0:
  16713. last_scissor = dev.scissors[num_scissors-1]
  16714. scissor = JM_rect_from_py(last_scissor)
  16715. scissor = mupdf.fz_intersect_rect(scissor, dev.pathrect)
  16716. else:
  16717. scissor = dev.pathrect
  16718. dev.scissors.append(JM_py_from_rect(scissor))
  16719. return scissor
  16720. class JM_new_lineart_device_Device(mupdf.FzDevice2):
  16721. '''
  16722. LINEART device for Python method Page.get_cdrawings()
  16723. '''
  16724. #log(f'JM_new_lineart_device_Device()')
  16725. def __init__(self, out, clips, method):
  16726. #log(f'JM_new_lineart_device_Device.__init__()')
  16727. super().__init__()
  16728. # fixme: this results in "Unexpected call of unimplemented virtual_fnptrs fn FzDevice2::drop_device().".
  16729. #self.use_virtual_drop_device()
  16730. self.use_virtual_fill_path()
  16731. self.use_virtual_stroke_path()
  16732. self.use_virtual_clip_path()
  16733. self.use_virtual_clip_image_mask()
  16734. self.use_virtual_clip_stroke_path()
  16735. self.use_virtual_clip_stroke_text()
  16736. self.use_virtual_clip_text()
  16737. self.use_virtual_fill_text
  16738. self.use_virtual_stroke_text
  16739. self.use_virtual_ignore_text
  16740. self.use_virtual_fill_shade()
  16741. self.use_virtual_fill_image()
  16742. self.use_virtual_fill_image_mask()
  16743. self.use_virtual_pop_clip()
  16744. self.use_virtual_begin_group()
  16745. self.use_virtual_end_group()
  16746. self.use_virtual_begin_layer()
  16747. self.use_virtual_end_layer()
  16748. self.out = out
  16749. self.seqno = 0
  16750. self.depth = 0
  16751. self.clips = clips
  16752. self.method = method
  16753. self.scissors = None
  16754. self.layer_name = "" # optional content name
  16755. self.pathrect = None
  16756. self.linewidth = 0
  16757. self.ptm = mupdf.FzMatrix()
  16758. self.ctm = mupdf.FzMatrix()
  16759. self.rot = mupdf.FzMatrix()
  16760. self.lastpoint = mupdf.FzPoint()
  16761. self.firstpoint = mupdf.FzPoint()
  16762. self.havemove = 0
  16763. self.pathrect = mupdf.FzRect()
  16764. self.pathfactor = 0
  16765. self.linecount = 0
  16766. self.path_type = 0
  16767. #drop_device = jm_lineart_drop_device
  16768. fill_path = jm_lineart_fill_path
  16769. stroke_path = jm_lineart_stroke_path
  16770. clip_image_mask = jm_lineart_clip_image_mask
  16771. clip_path = jm_lineart_clip_path
  16772. clip_stroke_path = jm_lineart_clip_stroke_path
  16773. clip_text = jm_lineart_clip_text
  16774. clip_stroke_text = jm_lineart_clip_stroke_text
  16775. fill_text = jm_increase_seqno
  16776. stroke_text = jm_increase_seqno
  16777. ignore_text = jm_increase_seqno
  16778. fill_shade = jm_increase_seqno
  16779. fill_image = jm_increase_seqno
  16780. fill_image_mask = jm_increase_seqno
  16781. pop_clip = jm_lineart_pop_clip
  16782. begin_group = jm_lineart_begin_group
  16783. end_group = jm_lineart_end_group
  16784. begin_layer = jm_lineart_begin_layer
  16785. end_layer = jm_lineart_end_layer
  16786. class JM_new_texttrace_device(mupdf.FzDevice2):
  16787. '''
  16788. Trace TEXT device for Python method Page.get_texttrace()
  16789. '''
  16790. def __init__(self, out):
  16791. super().__init__()
  16792. self.use_virtual_fill_path()
  16793. self.use_virtual_stroke_path()
  16794. self.use_virtual_fill_text()
  16795. self.use_virtual_stroke_text()
  16796. self.use_virtual_ignore_text()
  16797. self.use_virtual_fill_shade()
  16798. self.use_virtual_fill_image()
  16799. self.use_virtual_fill_image_mask()
  16800. self.use_virtual_begin_layer()
  16801. self.use_virtual_end_layer()
  16802. self.out = out
  16803. self.seqno = 0
  16804. self.depth = 0
  16805. self.clips = 0
  16806. self.method = None
  16807. self.seqno = 0
  16808. self.pathdict = dict()
  16809. self.scissors = list()
  16810. self.linewidth = 0
  16811. self.ptm = mupdf.FzMatrix()
  16812. self.ctm = mupdf.FzMatrix()
  16813. self.rot = mupdf.FzMatrix()
  16814. self.lastpoint = mupdf.FzPoint()
  16815. self.pathrect = mupdf.FzRect()
  16816. self.pathfactor = 0
  16817. self.linecount = 0
  16818. self.path_type = 0
  16819. self.layer_name = ""
  16820. fill_path = jm_increase_seqno
  16821. stroke_path = jm_dev_linewidth
  16822. fill_text = jm_lineart_fill_text
  16823. stroke_text = jm_lineart_stroke_text
  16824. ignore_text = jm_lineart_ignore_text
  16825. fill_shade = jm_increase_seqno
  16826. fill_image = jm_increase_seqno
  16827. fill_image_mask = jm_increase_seqno
  16828. begin_layer = jm_lineart_begin_layer
  16829. end_layer = jm_lineart_end_layer
  16830. def ConversionHeader(i: str, filename: OptStr ="unknown"):
  16831. t = i.lower()
  16832. import textwrap
  16833. html = textwrap.dedent("""
  16834. <!DOCTYPE html>
  16835. <html>
  16836. <head>
  16837. <style>
  16838. body{background-color:gray}
  16839. div{position:relative;background-color:white;margin:1em auto}
  16840. p{position:absolute;margin:0}
  16841. img{position:absolute}
  16842. </style>
  16843. </head>
  16844. <body>
  16845. """)
  16846. xml = textwrap.dedent("""
  16847. <?xml version="1.0"?>
  16848. <document name="%s">
  16849. """
  16850. % filename
  16851. )
  16852. xhtml = textwrap.dedent("""
  16853. <?xml version="1.0"?>
  16854. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  16855. <html xmlns="http://www.w3.org/1999/xhtml">
  16856. <head>
  16857. <style>
  16858. body{background-color:gray}
  16859. div{background-color:white;margin:1em;padding:1em}
  16860. p{white-space:pre-wrap}
  16861. </style>
  16862. </head>
  16863. <body>
  16864. """)
  16865. text = ""
  16866. json = '{"document": "%s", "pages": [\n' % filename
  16867. if t == "html":
  16868. r = html
  16869. elif t == "json":
  16870. r = json
  16871. elif t == "xml":
  16872. r = xml
  16873. elif t == "xhtml":
  16874. r = xhtml
  16875. else:
  16876. r = text
  16877. return r
  16878. def ConversionTrailer(i: str):
  16879. t = i.lower()
  16880. text = ""
  16881. json = "]\n}"
  16882. html = "</body>\n</html>\n"
  16883. xml = "</document>\n"
  16884. xhtml = html
  16885. if t == "html":
  16886. r = html
  16887. elif t == "json":
  16888. r = json
  16889. elif t == "xml":
  16890. r = xml
  16891. elif t == "xhtml":
  16892. r = xhtml
  16893. else:
  16894. r = text
  16895. return r
  16896. def annot_preprocess(page: "Page") -> int:
  16897. """Prepare for annotation insertion on the page.
  16898. Returns:
  16899. Old page rotation value. Temporarily sets rotation to 0 when required.
  16900. """
  16901. CheckParent(page)
  16902. if not page.parent.is_pdf:
  16903. raise ValueError("is no PDF")
  16904. old_rotation = page.rotation
  16905. if old_rotation != 0:
  16906. page.set_rotation(0)
  16907. return old_rotation
  16908. def annot_postprocess(page: "Page", annot: "Annot") -> None:
  16909. """Clean up after annotation insertion.
  16910. Set ownership flag and store annotation in page annotation dictionary.
  16911. """
  16912. #annot.parent = weakref.proxy(page)
  16913. assert isinstance( page, Page)
  16914. assert isinstance( annot, Annot)
  16915. annot.parent = page
  16916. page._annot_refs[id(annot)] = annot
  16917. annot.thisown = True
  16918. def canon(c):
  16919. assert isinstance(c, int)
  16920. # TODO: proper unicode case folding
  16921. # TODO: character equivalence (a matches ä, etc)
  16922. if c == 0xA0 or c == 0x2028 or c == 0x2029:
  16923. return ord(' ')
  16924. if c == ord('\r') or c == ord('\n') or c == ord('\t'):
  16925. return ord(' ')
  16926. if c >= ord('A') and c <= ord('Z'):
  16927. return c - ord('A') + ord('a')
  16928. return c
  16929. def chartocanon(s):
  16930. assert isinstance(s, str)
  16931. n, c = mupdf.fz_chartorune(s)
  16932. c = canon(c)
  16933. return n, c
  16934. def dest_is_valid(o, page_count, page_object_nums, names_list):
  16935. p = mupdf.pdf_dict_get( o, PDF_NAME('A'))
  16936. if (
  16937. mupdf.pdf_name_eq(
  16938. mupdf.pdf_dict_get( p, PDF_NAME('S')),
  16939. PDF_NAME('GoTo')
  16940. )
  16941. and not string_in_names_list(
  16942. mupdf.pdf_dict_get( p, PDF_NAME('D')),
  16943. names_list
  16944. )
  16945. ):
  16946. return 0
  16947. p = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
  16948. if not p.m_internal:
  16949. pass
  16950. elif mupdf.pdf_is_string( p):
  16951. return string_in_names_list( p, names_list)
  16952. elif not dest_is_valid_page(
  16953. mupdf.pdf_array_get( p, 0),
  16954. page_object_nums,
  16955. page_count,
  16956. ):
  16957. return 0
  16958. return 1
  16959. def dest_is_valid_page(obj, page_object_nums, pagecount):
  16960. num = mupdf.pdf_to_num(obj)
  16961. if num == 0:
  16962. return 0
  16963. for i in range(pagecount):
  16964. if page_object_nums[i] == num:
  16965. return 1
  16966. return 0
  16967. def find_string(s, needle):
  16968. assert isinstance(s, str)
  16969. for i in range(len(s)):
  16970. end = match_string(s[i:], needle)
  16971. if end is not None:
  16972. end += i
  16973. return i, end
  16974. return None, None
  16975. def get_pdf_now() -> str:
  16976. '''
  16977. "Now" timestamp in PDF Format
  16978. '''
  16979. import time
  16980. tz = "%s'%s'" % (
  16981. str(abs(time.altzone // 3600)).rjust(2, "0"),
  16982. str((abs(time.altzone // 60) % 60)).rjust(2, "0"),
  16983. )
  16984. tstamp = time.strftime("D:%Y%m%d%H%M%S", time.localtime())
  16985. if time.altzone > 0:
  16986. tstamp += "-" + tz
  16987. elif time.altzone < 0:
  16988. tstamp += "+" + tz
  16989. else:
  16990. pass
  16991. return tstamp
  16992. class ElementPosition(object):
  16993. """Convert a dictionary with element position information to an object."""
  16994. def __init__(self):
  16995. pass
  16996. def make_story_elpos():
  16997. return ElementPosition()
  16998. def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list:
  16999. """Return rectangles of text lines between two points.
  17000. Notes:
  17001. The default of 'start' is top-left of 'clip'. The default of 'stop'
  17002. is bottom-reight of 'clip'.
  17003. Args:
  17004. start: start point_like
  17005. stop: end point_like, must be 'below' start
  17006. clip: consider this rect_like only, default is page rectangle
  17007. Returns:
  17008. List of line bbox intersections with the area established by the
  17009. parameters.
  17010. """
  17011. # validate and normalize arguments
  17012. if clip is None:
  17013. clip = page.rect
  17014. clip = Rect(clip)
  17015. if start is None:
  17016. start = clip.tl
  17017. if stop is None:
  17018. stop = clip.br
  17019. clip.y0 = start.y
  17020. clip.y1 = stop.y
  17021. if clip.is_empty or clip.is_infinite:
  17022. return []
  17023. # extract text of page, clip only, no images, expand ligatures
  17024. blocks = page.get_text(
  17025. "dict", flags=0, clip=clip,
  17026. )["blocks"]
  17027. lines = [] # will return this list of rectangles
  17028. for b in blocks:
  17029. bbox = Rect(b["bbox"])
  17030. if bbox.is_infinite or bbox.is_empty:
  17031. continue
  17032. for line in b["lines"]:
  17033. bbox = Rect(line["bbox"])
  17034. if bbox.is_infinite or bbox.is_empty:
  17035. continue
  17036. lines.append(bbox)
  17037. if lines == []: # did not select anything
  17038. return lines
  17039. lines.sort(key=lambda bbox: bbox.y1) # sort by vertical positions
  17040. # cut off prefix from first line if start point is close to its top
  17041. bboxf = lines.pop(0)
  17042. if bboxf.y0 - start.y <= 0.1 * bboxf.height: # close enough?
  17043. r = Rect(start.x, bboxf.y0, bboxf.br) # intersection rectangle
  17044. if not (r.is_empty or r.is_infinite):
  17045. lines.insert(0, r) # insert again if not empty
  17046. else:
  17047. lines.insert(0, bboxf) # insert again
  17048. if lines == []: # the list might have been emptied
  17049. return lines
  17050. # cut off suffix from last line if stop point is close to its bottom
  17051. bboxl = lines.pop()
  17052. if stop.y - bboxl.y1 <= 0.1 * bboxl.height: # close enough?
  17053. r = Rect(bboxl.tl, stop.x, bboxl.y1) # intersection rectangle
  17054. if not (r.is_empty or r.is_infinite):
  17055. lines.append(r) # append if not empty
  17056. else:
  17057. lines.append(bboxl) # append again
  17058. return lines
  17059. def glyph_name_to_unicode(name: str) -> int:
  17060. """Convenience function accessing unicodedata."""
  17061. import unicodedata
  17062. try:
  17063. unc = ord(unicodedata.lookup(name))
  17064. except Exception:
  17065. unc = 65533
  17066. return unc
  17067. def hdist(dir, a, b):
  17068. dx = b.x - a.x
  17069. dy = b.y - a.y
  17070. return mupdf.fz_abs(dx * dir.x + dy * dir.y)
  17071. def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> list:
  17072. """Return a list of (rows x cols) equal sized rectangles.
  17073. Notes:
  17074. A utility to fill a given area with table cells of equal size.
  17075. Args:
  17076. rect: rect_like to use as the table area
  17077. rows: number of rows
  17078. cols: number of columns
  17079. Returns:
  17080. A list with <rows> items, where each item is a list of <cols>
  17081. PyMuPDF Rect objects of equal sizes.
  17082. """
  17083. rect = Rect(rect) # ensure this is a Rect
  17084. if rect.is_empty or rect.is_infinite:
  17085. raise ValueError("rect must be finite and not empty")
  17086. tl = rect.tl
  17087. height = rect.height / rows # height of one table cell
  17088. width = rect.width / cols # width of one table cell
  17089. delta_h = (width, 0, width, 0) # diff to next right rect
  17090. delta_v = (0, height, 0, height) # diff to next lower rect
  17091. r = Rect(tl, tl.x + width, tl.y + height) # first rectangle
  17092. # make the first row
  17093. row = [r]
  17094. for i in range(1, cols):
  17095. r += delta_h # build next rect to the right
  17096. row.append(r)
  17097. # make result, starts with first row
  17098. rects = [row]
  17099. for i in range(1, rows):
  17100. row = rects[i - 1] # take previously appended row
  17101. nrow = [] # the new row to append
  17102. for r in row: # for each previous cell add its downward copy
  17103. nrow.append(r + delta_v)
  17104. rects.append(nrow) # append new row to result
  17105. return rects
  17106. def util_ensure_widget_calc(annot):
  17107. '''
  17108. Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO
  17109. '''
  17110. annot_obj = mupdf.pdf_annot_obj(annot.this)
  17111. pdf = mupdf.pdf_get_bound_document(annot_obj)
  17112. PDFNAME_CO = mupdf.pdf_new_name("CO") # = PDF_NAME(CO)
  17113. acro = mupdf.pdf_dict_getl( # get AcroForm dict
  17114. mupdf.pdf_trailer(pdf),
  17115. PDF_NAME('Root'),
  17116. PDF_NAME('AcroForm'),
  17117. )
  17118. CO = mupdf.pdf_dict_get(acro, PDFNAME_CO) # = AcroForm/CO
  17119. if not mupdf.pdf_is_array(CO):
  17120. CO = mupdf.pdf_dict_put_array(acro, PDFNAME_CO, 2)
  17121. n = mupdf.pdf_array_len(CO)
  17122. found = 0
  17123. xref = mupdf.pdf_to_num(annot_obj)
  17124. for i in range(n):
  17125. nxref = mupdf.pdf_to_num(mupdf.pdf_array_get(CO, i))
  17126. if xref == nxref:
  17127. found = 1
  17128. break
  17129. if not found:
  17130. mupdf.pdf_array_push(CO, mupdf.pdf_new_indirect(pdf, xref, 0))
  17131. def util_make_rect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
  17132. '''
  17133. Helper for initialising rectangle classes.
  17134. 2022-09-02: This is quite different from PyMuPDF's util_make_rect(), which
  17135. uses `goto` in ways that don't easily translate to Python.
  17136. Returns (x0, y0, x1, y1) derived from <args>, then override with p0, p1,
  17137. x0, y0, x1, y1 if they are not None.
  17138. Accepts following forms for <args>:
  17139. () returns all zeros.
  17140. (top-left, bottom-right)
  17141. (top-left, x1, y1)
  17142. (x0, y0, bottom-right)
  17143. (x0, y0, x1, y1)
  17144. (rect)
  17145. Where top-left and bottom-right are (x, y) or something with .x, .y
  17146. members; rect is something with .x0, .y0, .x1, and .y1 members.
  17147. 2023-11-18: we now override with p0, p1, x0, y0, x1, y1 if not None.
  17148. '''
  17149. def get_xy( arg):
  17150. if isinstance( arg, (list, tuple)) and len( arg) == 2:
  17151. return arg[0], arg[1]
  17152. if isinstance( arg, (Point, mupdf.FzPoint, mupdf.fz_point)):
  17153. return arg.x, arg.y
  17154. return None, None
  17155. def make_tuple( a):
  17156. if isinstance( a, tuple):
  17157. return a
  17158. if isinstance( a, Point):
  17159. return a.x, a.y
  17160. elif isinstance( a, (Rect, IRect, mupdf.FzRect, mupdf.fz_rect)):
  17161. return a.x0, a.y0, a.x1, a.y1
  17162. if not isinstance( a, (list, tuple)):
  17163. a = a,
  17164. return a
  17165. def handle_args():
  17166. if len(args) == 0:
  17167. return 0, 0, 0, 0
  17168. elif len(args) == 1:
  17169. arg = args[0]
  17170. if isinstance( arg, (list, tuple)) and len( arg) == 2:
  17171. p1, p2 = arg
  17172. ret = *p1, *p2
  17173. assert len(ret) == 4
  17174. return ret
  17175. if isinstance( arg, (list, tuple)) and len( arg) == 3:
  17176. a, b, c = arg
  17177. a = make_tuple(a)
  17178. b = make_tuple(b)
  17179. c = make_tuple(c)
  17180. ret = *a, *b, *c
  17181. assert len(ret) == 4
  17182. return ret
  17183. ret = make_tuple( arg)
  17184. assert len(ret) == 4, f'{arg=} {ret=}'
  17185. return ret
  17186. elif len(args) == 2:
  17187. ret = get_xy( args[0]) + get_xy( args[1])
  17188. assert len(ret) == 4
  17189. return ret
  17190. elif len(args) == 3:
  17191. x0, y0 = get_xy( args[0])
  17192. if (x0, y0) != (None, None):
  17193. return x0, y0, args[1], args[2]
  17194. x1, y1 = get_xy( args[2])
  17195. if (x1, y1) != (None, None):
  17196. return args[0], args[1], x1, y1
  17197. elif len(args) == 4:
  17198. return args[0], args[1], args[2], args[3]
  17199. raise Exception( f'Unrecognised args: {args}')
  17200. ret_x0, ret_y0, ret_x1, ret_y1 = handle_args()
  17201. if p0 is not None: ret_x0, ret_y0 = get_xy(p0)
  17202. if p1 is not None: ret_x1, ret_y1 = get_xy(p1)
  17203. if x0 is not None: ret_x0 = x0
  17204. if y0 is not None: ret_y0 = y0
  17205. if x1 is not None: ret_x1 = x1
  17206. if y1 is not None: ret_y1 = y1
  17207. return ret_x0, ret_y0, ret_x1, ret_y1
  17208. def util_make_irect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
  17209. a, b, c, d = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
  17210. def convert(x, ceil):
  17211. if ceil:
  17212. return int(math.ceil(x))
  17213. else:
  17214. return int(math.floor(x))
  17215. a = convert(a, False)
  17216. b = convert(b, False)
  17217. c = convert(c, True)
  17218. d = convert(d, True)
  17219. return a, b, c, d
  17220. def util_round_rect( rect):
  17221. return JM_py_from_irect(mupdf.fz_round_rect(JM_rect_from_py(rect)))
  17222. def util_transform_rect( rect, matrix):
  17223. if g_use_extra:
  17224. return extra.util_transform_rect( rect, matrix)
  17225. return JM_py_from_rect(mupdf.fz_transform_rect(JM_rect_from_py(rect), JM_matrix_from_py(matrix)))
  17226. def util_intersect_rect( r1, r2):
  17227. return JM_py_from_rect(
  17228. mupdf.fz_intersect_rect(
  17229. JM_rect_from_py(r1),
  17230. JM_rect_from_py(r2),
  17231. )
  17232. )
  17233. def util_is_point_in_rect( p, r):
  17234. return mupdf.fz_is_point_inside_rect(
  17235. JM_point_from_py(p),
  17236. JM_rect_from_py(r),
  17237. )
  17238. def util_include_point_in_rect( r, p):
  17239. return JM_py_from_rect(
  17240. mupdf.fz_include_point_in_rect(
  17241. JM_rect_from_py(r),
  17242. JM_point_from_py(p),
  17243. )
  17244. )
  17245. def util_point_in_quad( P, Q):
  17246. p = JM_point_from_py(P)
  17247. q = JM_quad_from_py(Q)
  17248. return mupdf.fz_is_point_inside_quad(p, q)
  17249. def util_transform_point( point, matrix):
  17250. return JM_py_from_point(
  17251. mupdf.fz_transform_point(
  17252. JM_point_from_py(point),
  17253. JM_matrix_from_py(matrix),
  17254. )
  17255. )
  17256. def util_union_rect( r1, r2):
  17257. return JM_py_from_rect(
  17258. mupdf.fz_union_rect(
  17259. JM_rect_from_py(r1),
  17260. JM_rect_from_py(r2),
  17261. )
  17262. )
  17263. def util_concat_matrix( m1, m2):
  17264. return JM_py_from_matrix(
  17265. mupdf.fz_concat(
  17266. JM_matrix_from_py(m1),
  17267. JM_matrix_from_py(m2),
  17268. )
  17269. )
  17270. def util_invert_matrix(matrix):
  17271. if 0:
  17272. # Use MuPDF's fz_invert_matrix().
  17273. if isinstance( matrix, (tuple, list)):
  17274. matrix = mupdf.FzMatrix( *matrix)
  17275. elif isinstance( matrix, mupdf.fz_matrix):
  17276. matrix = mupdf.FzMatrix( matrix)
  17277. elif isinstance( matrix, Matrix):
  17278. matrix = mupdf.FzMatrix( matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f)
  17279. assert isinstance( matrix, mupdf.FzMatrix), f'{type(matrix)=}: {matrix}'
  17280. ret = mupdf.fz_invert_matrix( matrix)
  17281. if ret == matrix and (0
  17282. or abs( matrix.a - 1) >= sys.float_info.epsilon
  17283. or abs( matrix.b - 0) >= sys.float_info.epsilon
  17284. or abs( matrix.c - 0) >= sys.float_info.epsilon
  17285. or abs( matrix.d - 1) >= sys.float_info.epsilon
  17286. ):
  17287. # Inversion not possible.
  17288. return 1, ()
  17289. return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f)
  17290. # Do inversion in python.
  17291. src = JM_matrix_from_py(matrix)
  17292. a = src.a
  17293. det = a * src.d - src.b * src.c
  17294. if det < -sys.float_info.epsilon or det > sys.float_info.epsilon:
  17295. dst = mupdf.FzMatrix()
  17296. rdet = 1 / det
  17297. dst.a = src.d * rdet
  17298. dst.b = -src.b * rdet
  17299. dst.c = -src.c * rdet
  17300. dst.d = a * rdet
  17301. a = -src.e * dst.a - src.f * dst.c
  17302. dst.f = -src.e * dst.b - src.f * dst.d
  17303. dst.e = a
  17304. return 0, (dst.a, dst.b, dst.c, dst.d, dst.e, dst.f)
  17305. return 1, ()
  17306. def util_measure_string( text, fontname, fontsize, encoding):
  17307. font = mupdf.fz_new_base14_font(fontname)
  17308. w = 0
  17309. pos = 0
  17310. while pos < len(text):
  17311. t, c = mupdf.fz_chartorune(text[pos:])
  17312. pos += t
  17313. if encoding == mupdf.PDF_SIMPLE_ENCODING_GREEK:
  17314. c = mupdf.fz_iso8859_7_from_unicode(c)
  17315. elif encoding == mupdf.PDF_SIMPLE_ENCODING_CYRILLIC:
  17316. c = mupdf.fz_windows_1251_from_unicode(c)
  17317. else:
  17318. c = mupdf.fz_windows_1252_from_unicode(c)
  17319. if c < 0:
  17320. c = 0xB7
  17321. g = mupdf.fz_encode_character(font, c)
  17322. dw = mupdf.fz_advance_glyph(font, g, 0)
  17323. w += dw
  17324. ret = w * fontsize
  17325. return ret
  17326. def util_sine_between(C, P, Q):
  17327. # for points C, P, Q compute the sine between lines CP and QP
  17328. c = JM_point_from_py(C)
  17329. p = JM_point_from_py(P)
  17330. q = JM_point_from_py(Q)
  17331. s = mupdf.fz_normalize_vector(mupdf.fz_make_point(q.x - p.x, q.y - p.y))
  17332. m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -p.x, -p.y)
  17333. m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
  17334. m1 = mupdf.fz_concat(m1, m2)
  17335. c = mupdf.fz_transform_point(c, m1)
  17336. c = mupdf.fz_normalize_vector(c)
  17337. return c.y
  17338. def util_hor_matrix(C, P):
  17339. '''
  17340. Return the matrix that maps two points C, P to the x-axis such that
  17341. C -> (0,0) and the image of P have the same distance.
  17342. '''
  17343. c = JM_point_from_py(C)
  17344. p = JM_point_from_py(P)
  17345. # compute (cosine, sine) of vector P-C with double precision:
  17346. s = mupdf.fz_normalize_vector(mupdf.fz_make_point(p.x - c.x, p.y - c.y))
  17347. m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -c.x, -c.y)
  17348. m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
  17349. return JM_py_from_matrix(mupdf.fz_concat(m1, m2))
  17350. def match_string(h0, n0):
  17351. h = 0
  17352. n = 0
  17353. e = h
  17354. delta_h, hc = chartocanon(h0[h:])
  17355. h += delta_h
  17356. delta_n, nc = chartocanon(n0[n:])
  17357. n += delta_n
  17358. while hc == nc:
  17359. e = h
  17360. if hc == ord(' '):
  17361. while 1:
  17362. delta_h, hc = chartocanon(h0[h:])
  17363. h += delta_h
  17364. if hc != ord(' '):
  17365. break
  17366. else:
  17367. delta_h, hc = chartocanon(h0[h:])
  17368. h += delta_h
  17369. if nc == ord(' '):
  17370. while 1:
  17371. delta_n, nc = chartocanon(n0[n:])
  17372. n += delta_n
  17373. if nc != ord(' '):
  17374. break
  17375. else:
  17376. delta_n, nc = chartocanon(n0[n:])
  17377. n += delta_n
  17378. return None if nc != 0 else e
  17379. def on_highlight_char(hits, line, ch):
  17380. assert hits
  17381. assert isinstance(line, mupdf.FzStextLine)
  17382. assert isinstance(ch, mupdf.FzStextChar)
  17383. vfuzz = ch.m_internal.size * hits.vfuzz
  17384. hfuzz = ch.m_internal.size * hits.hfuzz
  17385. ch_quad = JM_char_quad(line, ch)
  17386. if hits.len > 0:
  17387. # fixme: end = hits.quads[-1]
  17388. quad = hits.quads[hits.len - 1]
  17389. end = JM_quad_from_py(quad)
  17390. if ( 1
  17391. and hdist(line.m_internal.dir, end.lr, ch_quad.ll) < hfuzz
  17392. and vdist(line.m_internal.dir, end.lr, ch_quad.ll) < vfuzz
  17393. and hdist(line.m_internal.dir, end.ur, ch_quad.ul) < hfuzz
  17394. and vdist(line.m_internal.dir, end.ur, ch_quad.ul) < vfuzz
  17395. ):
  17396. end.ur = ch_quad.ur
  17397. end.lr = ch_quad.lr
  17398. assert hits.quads[-1] == end
  17399. return
  17400. hits.quads.append(ch_quad)
  17401. hits.len += 1
  17402. def page_merge(doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map):
  17403. '''
  17404. Deep-copies a source page to the target.
  17405. Modified version of function of pdfmerge.c: we also copy annotations, but
  17406. we skip some subtypes. In addition we rotate output.
  17407. '''
  17408. if g_use_extra:
  17409. #log( 'Calling C++ extra.page_merge()')
  17410. return extra.page_merge( doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map)
  17411. # list of object types (per page) we want to copy
  17412. known_page_objs = [
  17413. PDF_NAME('Contents'),
  17414. PDF_NAME('Resources'),
  17415. PDF_NAME('MediaBox'),
  17416. PDF_NAME('CropBox'),
  17417. PDF_NAME('BleedBox'),
  17418. PDF_NAME('TrimBox'),
  17419. PDF_NAME('ArtBox'),
  17420. PDF_NAME('Rotate'),
  17421. PDF_NAME('UserUnit'),
  17422. ]
  17423. page_ref = mupdf.pdf_lookup_page_obj(doc_src, page_from)
  17424. # make new page dict in dest doc
  17425. page_dict = mupdf.pdf_new_dict(doc_des, 4)
  17426. mupdf.pdf_dict_put(page_dict, PDF_NAME('Type'), PDF_NAME('Page'))
  17427. # copy objects of source page into it
  17428. for i in range( len(known_page_objs)):
  17429. obj = mupdf.pdf_dict_get_inheritable( page_ref, known_page_objs[i])
  17430. if obj.m_internal:
  17431. #log( '{=type(graft_map) type(graft_map.this)}')
  17432. mupdf.pdf_dict_put( page_dict, known_page_objs[i], mupdf.pdf_graft_mapped_object(graft_map.this, obj))
  17433. # Copy annotations, but skip Link, Popup, IRT, Widget types
  17434. # If selected, remove dict keys P (parent) and Popup
  17435. if copy_annots:
  17436. old_annots = mupdf.pdf_dict_get( page_ref, PDF_NAME('Annots'))
  17437. n = mupdf.pdf_array_len( old_annots)
  17438. if n > 0:
  17439. new_annots = mupdf.pdf_dict_put_array( page_dict, PDF_NAME('Annots'), n)
  17440. for i in range(n):
  17441. o = mupdf.pdf_array_get( old_annots, i)
  17442. if not o.m_internal or not mupdf.pdf_is_dict(o):
  17443. continue # skip non-dict items
  17444. if mupdf.pdf_dict_gets( o, "IRT").m_internal:
  17445. continue
  17446. subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
  17447. if mupdf.pdf_name_eq( subtype, PDF_NAME('Link')):
  17448. continue
  17449. if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
  17450. continue
  17451. if mupdf.pdf_name_eq(subtype, PDF_NAME('Widget')):
  17452. continue
  17453. mupdf.pdf_dict_del( o, PDF_NAME('Popup'))
  17454. mupdf.pdf_dict_del( o, PDF_NAME('P'))
  17455. copy_o = mupdf.pdf_graft_mapped_object( graft_map.this, o)
  17456. annot = mupdf.pdf_new_indirect( doc_des, mupdf.pdf_to_num( copy_o), 0)
  17457. mupdf.pdf_array_push( new_annots, annot)
  17458. # rotate the page
  17459. if rotate != -1:
  17460. mupdf.pdf_dict_put_int( page_dict, PDF_NAME('Rotate'), rotate)
  17461. # Now add the page dictionary to dest PDF
  17462. ref = mupdf.pdf_add_object( doc_des, page_dict)
  17463. # Insert new page at specified location
  17464. mupdf.pdf_insert_page( doc_des, page_to, ref)
  17465. def paper_rect(s: str) -> Rect:
  17466. """Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked.
  17467. """
  17468. width, height = paper_size(s)
  17469. return Rect(0.0, 0.0, width, height)
  17470. def paper_size(s: str) -> tuple:
  17471. """Return a tuple (width, height) for a given paper format string.
  17472. Notes:
  17473. 'A4-L' will return (842, 595), the values for A4 landscape.
  17474. Suffix '-P' and no suffix return the portrait tuple.
  17475. """
  17476. size = s.lower()
  17477. f = "p"
  17478. if size.endswith("-l"):
  17479. f = "l"
  17480. size = size[:-2]
  17481. if size.endswith("-p"):
  17482. size = size[:-2]
  17483. rc = paper_sizes().get(size, (-1, -1))
  17484. if f == "p":
  17485. return rc
  17486. return (rc[1], rc[0])
  17487. def paper_sizes():
  17488. """Known paper formats @ 72 dpi as a dictionary. Key is the format string
  17489. like "a4" for ISO-A4. Value is the tuple (width, height).
  17490. Information taken from the following web sites:
  17491. www.din-formate.de
  17492. www.din-formate.info/amerikanische-formate.html
  17493. www.directtools.de/wissen/normen/iso.htm
  17494. """
  17495. return {
  17496. "a0": (2384, 3370),
  17497. "a1": (1684, 2384),
  17498. "a10": (74, 105),
  17499. "a2": (1191, 1684),
  17500. "a3": (842, 1191),
  17501. "a4": (595, 842),
  17502. "a5": (420, 595),
  17503. "a6": (298, 420),
  17504. "a7": (210, 298),
  17505. "a8": (147, 210),
  17506. "a9": (105, 147),
  17507. "b0": (2835, 4008),
  17508. "b1": (2004, 2835),
  17509. "b10": (88, 125),
  17510. "b2": (1417, 2004),
  17511. "b3": (1001, 1417),
  17512. "b4": (709, 1001),
  17513. "b5": (499, 709),
  17514. "b6": (354, 499),
  17515. "b7": (249, 354),
  17516. "b8": (176, 249),
  17517. "b9": (125, 176),
  17518. "c0": (2599, 3677),
  17519. "c1": (1837, 2599),
  17520. "c10": (79, 113),
  17521. "c2": (1298, 1837),
  17522. "c3": (918, 1298),
  17523. "c4": (649, 918),
  17524. "c5": (459, 649),
  17525. "c6": (323, 459),
  17526. "c7": (230, 323),
  17527. "c8": (162, 230),
  17528. "c9": (113, 162),
  17529. "card-4x6": (288, 432),
  17530. "card-5x7": (360, 504),
  17531. "commercial": (297, 684),
  17532. "executive": (522, 756),
  17533. "invoice": (396, 612),
  17534. "ledger": (792, 1224),
  17535. "legal": (612, 1008),
  17536. "legal-13": (612, 936),
  17537. "letter": (612, 792),
  17538. "monarch": (279, 540),
  17539. "tabloid-extra": (864, 1296),
  17540. }
  17541. def pdf_lookup_page_loc(doc, needle):
  17542. return mupdf.pdf_lookup_page_loc(doc, needle)
  17543. def pdfobj_string(o, prefix=''):
  17544. '''
  17545. Returns description of mupdf.PdfObj (wrapper for pdf_obj) <o>.
  17546. '''
  17547. assert 0, 'use mupdf.pdf_debug_obj() ?'
  17548. ret = ''
  17549. if mupdf.pdf_is_array(o):
  17550. l = mupdf.pdf_array_len(o)
  17551. ret += f'array {l}\n'
  17552. for i in range(l):
  17553. oo = mupdf.pdf_array_get(o, i)
  17554. ret += pdfobj_string(oo, prefix + ' ')
  17555. ret += '\n'
  17556. elif mupdf.pdf_is_bool(o):
  17557. ret += f'bool: {o.array_get_bool()}\n'
  17558. elif mupdf.pdf_is_dict(o):
  17559. l = mupdf.pdf_dict_len(o)
  17560. ret += f'dict {l}\n'
  17561. for i in range(l):
  17562. key = mupdf.pdf_dict_get_key(o, i)
  17563. value = mupdf.pdf_dict_get( o, key)
  17564. ret += f'{prefix} {key}: '
  17565. ret += pdfobj_string( value, prefix + ' ')
  17566. ret += '\n'
  17567. elif mupdf.pdf_is_embedded_file(o):
  17568. ret += f'embedded_file: {o.embedded_file_name()}\n'
  17569. elif mupdf.pdf_is_indirect(o):
  17570. ret += f'indirect: ...\n'
  17571. elif mupdf.pdf_is_int(o):
  17572. ret += f'int: {mupdf.pdf_to_int(o)}\n'
  17573. elif mupdf.pdf_is_jpx_image(o):
  17574. ret += f'jpx_image:\n'
  17575. elif mupdf.pdf_is_name(o):
  17576. ret += f'name: {mupdf.pdf_to_name(o)}\n'
  17577. elif o.pdf_is_null:
  17578. ret += f'null\n'
  17579. #elif o.pdf_is_number:
  17580. # ret += f'number\n'
  17581. elif o.pdf_is_real:
  17582. ret += f'real: {o.pdf_to_real()}\n'
  17583. elif mupdf.pdf_is_stream(o):
  17584. ret += f'stream\n'
  17585. elif mupdf.pdf_is_string(o):
  17586. ret += f'string: {mupdf.pdf_to_string(o)}\n'
  17587. else:
  17588. ret += '<>\n'
  17589. return ret
  17590. def repair_mono_font(page: "Page", font: "Font") -> None:
  17591. """Repair character spacing for mono fonts.
  17592. Notes:
  17593. Some mono-spaced fonts are displayed with a too large character
  17594. distance, e.g. "a b c" instead of "abc". This utility adds an entry
  17595. "/W[0 65535 w]" to the descendent font(s) of font. The float w is
  17596. taken to be the width of 0x20 (space).
  17597. This should enforce viewers to use 'w' as the character width.
  17598. Args:
  17599. page: pymupdf.Page object.
  17600. font: pymupdf.Font object.
  17601. """
  17602. if not font.flags["mono"]: # font not flagged as monospaced
  17603. return None
  17604. doc = page.parent # the document
  17605. fontlist = page.get_fonts() # list of fonts on page
  17606. xrefs = [ # list of objects referring to font
  17607. f[0]
  17608. for f in fontlist
  17609. if (f[3] == font.name and f[4].startswith("F") and f[5].startswith("Identity"))
  17610. ]
  17611. if xrefs == []: # our font does not occur
  17612. return
  17613. xrefs = set(xrefs) # drop any double counts
  17614. width = int(round((font.glyph_advance(32) * 1000)))
  17615. for xref in xrefs:
  17616. if not TOOLS.set_font_width(doc, xref, width):
  17617. log("Cannot set width for '%s' in xref %i" % (font.name, xref))
  17618. def sRGB_to_pdf(srgb: int) -> tuple:
  17619. """Convert sRGB color code to a PDF color triple.
  17620. There is **no error checking** for performance reasons!
  17621. Args:
  17622. srgb: (int) RRGGBB (red, green, blue), each color in range(255).
  17623. Returns:
  17624. Tuple (red, green, blue) each item in interval 0 <= item <= 1.
  17625. """
  17626. t = sRGB_to_rgb(srgb)
  17627. return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0
  17628. def sRGB_to_rgb(srgb: int) -> tuple:
  17629. """Convert sRGB color code to an RGB color triple.
  17630. There is **no error checking** for performance reasons!
  17631. Args:
  17632. srgb: (int) SSRRGGBB (red, green, blue), each color in range(255).
  17633. With MuPDF < 1.26, `s` is always 0.
  17634. Returns:
  17635. Tuple (red, green, blue) each item in interval 0 <= item <= 255.
  17636. """
  17637. srgb &= 0xffffff
  17638. r = srgb >> 16
  17639. g = (srgb - (r << 16)) >> 8
  17640. b = srgb - (r << 16) - (g << 8)
  17641. return (r, g, b)
  17642. def string_in_names_list(p, names_list):
  17643. n = mupdf.pdf_array_len( names_list) if names_list else 0
  17644. str_ = mupdf.pdf_to_text_string( p)
  17645. for i in range(0, n, 2):
  17646. if mupdf.pdf_to_text_string( mupdf.pdf_array_get( names_list, i)) == str_:
  17647. return 1
  17648. return 0
  17649. def strip_outline(doc, outlines, page_count, page_object_nums, names_list):
  17650. '''
  17651. Returns (count, first, prev).
  17652. '''
  17653. first = None
  17654. count = 0
  17655. current = outlines
  17656. prev = None
  17657. while current.m_internal:
  17658. # Strip any children to start with. This takes care of
  17659. # First / Last / Count for us.
  17660. nc = strip_outlines(doc, current, page_count, page_object_nums, names_list)
  17661. if not dest_is_valid(current, page_count, page_object_nums, names_list):
  17662. if nc == 0:
  17663. # Outline with invalid dest and no children. Drop it by
  17664. # pulling the next one in here.
  17665. next = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
  17666. if not next.m_internal:
  17667. # There is no next one to pull in
  17668. if prev.m_internal:
  17669. mupdf.pdf_dict_del(prev, PDF_NAME('Next'))
  17670. elif prev.m_internal:
  17671. mupdf.pdf_dict_put(prev, PDF_NAME('Next'), next)
  17672. mupdf.pdf_dict_put(next, PDF_NAME('Prev'), prev)
  17673. else:
  17674. mupdf.pdf_dict_del(next, PDF_NAME('Prev'))
  17675. current = next
  17676. else:
  17677. # Outline with invalid dest, but children. Just drop the dest.
  17678. mupdf.pdf_dict_del(current, PDF_NAME('Dest'))
  17679. mupdf.pdf_dict_del(current, PDF_NAME('A'))
  17680. current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
  17681. else:
  17682. # Keep this one
  17683. if not first or not first.m_internal:
  17684. first = current
  17685. prev = current
  17686. current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
  17687. count += 1
  17688. return count, first, prev
  17689. def strip_outlines(doc, outlines, page_count, page_object_nums, names_list):
  17690. if not outlines.m_internal:
  17691. return 0
  17692. first = mupdf.pdf_dict_get(outlines, PDF_NAME('First'))
  17693. if not first.m_internal:
  17694. nc = 0
  17695. else:
  17696. nc, first, last = strip_outline(doc, first, page_count, page_object_nums, names_list)
  17697. if nc == 0:
  17698. mupdf.pdf_dict_del(outlines, PDF_NAME('First'))
  17699. mupdf.pdf_dict_del(outlines, PDF_NAME('Last'))
  17700. mupdf.pdf_dict_del(outlines, PDF_NAME('Count'))
  17701. else:
  17702. old_count = mupdf.pdf_to_int(mupdf.pdf_dict_get(outlines, PDF_NAME('Count')))
  17703. mupdf.pdf_dict_put(outlines, PDF_NAME('First'), first)
  17704. mupdf.pdf_dict_put(outlines, PDF_NAME('Last'), last)
  17705. mupdf.pdf_dict_put(outlines, PDF_NAME('Count'), mupdf.pdf_new_int(nc if old_count > 0 else -nc))
  17706. return nc
  17707. trace_device_FILL_PATH = 1
  17708. trace_device_STROKE_PATH = 2
  17709. trace_device_CLIP_PATH = 3
  17710. trace_device_CLIP_STROKE_PATH = 4
  17711. def unicode_to_glyph_name(ch: int) -> str:
  17712. """
  17713. Convenience function accessing unicodedata.
  17714. """
  17715. import unicodedata
  17716. try:
  17717. name = unicodedata.name(chr(ch))
  17718. except ValueError:
  17719. name = ".notdef"
  17720. return name
  17721. def vdist(dir, a, b):
  17722. dx = b.x - a.x
  17723. dy = b.y - a.y
  17724. return mupdf.fz_abs(dx * dir.y + dy * dir.x)
  17725. def apply_pages(
  17726. path,
  17727. pagefn,
  17728. *,
  17729. pagefn_args=(),
  17730. pagefn_kwargs=dict(),
  17731. initfn=None,
  17732. initfn_args=(),
  17733. initfn_kwargs=dict(),
  17734. pages=None,
  17735. method='single',
  17736. concurrency=None,
  17737. _stats=False,
  17738. ):
  17739. '''
  17740. Returns list of results from `pagefn()`, optionally using concurrency for
  17741. speed.
  17742. Args:
  17743. path:
  17744. Path of document.
  17745. pagefn:
  17746. Function to call for each page; is passed (page, *pagefn_args,
  17747. **pagefn_kwargs). Return value is added to list that we return. If
  17748. `method` is not 'single', must be a top-level function - nested
  17749. functions don't work with concurrency.
  17750. pagefn_args
  17751. pagefn_kwargs:
  17752. Additional args to pass to `pagefn`. Must be picklable.
  17753. initfn:
  17754. If true, called once in each worker process; is passed
  17755. (*initfn_args, **initfn_kwargs).
  17756. initfn_args
  17757. initfn_kwargs:
  17758. Args to pass to initfn. Must be picklable.
  17759. pages:
  17760. List of page numbers to process, or None to include all pages.
  17761. method:
  17762. 'single'
  17763. Do not use concurrency.
  17764. 'mp'
  17765. Operate concurrently using Python's `multiprocessing` module.
  17766. 'fork'
  17767. Operate concurrently using custom implementation with
  17768. `os.fork()`. Does not work on Windows.
  17769. concurrency:
  17770. Number of worker processes to use when operating concurrently. If
  17771. None, we use the number of available CPUs.
  17772. _stats:
  17773. Internal, may change or be removed. If true, we output simple
  17774. timing diagnostics.
  17775. Note: We require a file path rather than a Document, because Document
  17776. instances do not work properly after a fork - internal file descriptor
  17777. offsets are shared between the parent and child processes.
  17778. '''
  17779. if _stats:
  17780. t0 = time.time()
  17781. if method == 'single':
  17782. if initfn:
  17783. initfn(*initfn_args, **initfn_kwargs)
  17784. ret = list()
  17785. document = Document(path)
  17786. if pages is None:
  17787. pages = range(len(document))
  17788. for pno in pages:
  17789. page = document[pno]
  17790. r = pagefn(page, *pagefn_args, **initfn_kwargs)
  17791. ret.append(r)
  17792. else:
  17793. # Use concurrency.
  17794. #
  17795. from . import _apply_pages
  17796. if pages is None:
  17797. if _stats:
  17798. t = time.time()
  17799. with Document(path) as document:
  17800. num_pages = len(document)
  17801. pages = list(range(num_pages))
  17802. if _stats:
  17803. t = time.time() - t
  17804. log(f'{t:.2f}s: count pages.')
  17805. if _stats:
  17806. t = time.time()
  17807. if method == 'mp':
  17808. ret = _apply_pages._multiprocessing(
  17809. path,
  17810. pages,
  17811. pagefn,
  17812. pagefn_args,
  17813. pagefn_kwargs,
  17814. initfn,
  17815. initfn_args,
  17816. initfn_kwargs,
  17817. concurrency,
  17818. _stats,
  17819. )
  17820. elif method == 'fork':
  17821. ret = _apply_pages._fork(
  17822. path,
  17823. pages,
  17824. pagefn,
  17825. pagefn_args,
  17826. pagefn_kwargs,
  17827. initfn,
  17828. initfn_args,
  17829. initfn_kwargs,
  17830. concurrency,
  17831. _stats,
  17832. )
  17833. else:
  17834. assert 0, f'Unrecognised {method=}.'
  17835. if _stats:
  17836. t = time.time() - t
  17837. log(f'{t:.2f}s: work.')
  17838. if _stats:
  17839. t = time.time() - t0
  17840. log(f'{t:.2f}s: total.')
  17841. return ret
  17842. def get_text(
  17843. path,
  17844. *,
  17845. pages=None,
  17846. method='single',
  17847. concurrency=None,
  17848. option='text',
  17849. clip=None,
  17850. flags=None,
  17851. textpage=None,
  17852. sort=False,
  17853. delimiters=None,
  17854. _stats=False,
  17855. ):
  17856. '''
  17857. Returns list of results from `Page.get_text()`, optionally using
  17858. concurrency for speed.
  17859. Args:
  17860. path:
  17861. Path of document.
  17862. pages:
  17863. List of page numbers to process, or None to include all pages.
  17864. method:
  17865. 'single'
  17866. Do not use concurrency.
  17867. 'mp'
  17868. Operate concurrently using Python's `multiprocessing` module.
  17869. 'fork'
  17870. Operate concurrently using custom implementation with
  17871. `os.fork`. Does not work on Windows.
  17872. concurrency:
  17873. Number of worker processes to use when operating concurrently. If
  17874. None, we use the number of available CPUs.
  17875. option
  17876. clip
  17877. flags
  17878. textpage
  17879. sort
  17880. delimiters:
  17881. Passed to internal calls to `Page.get_text()`.
  17882. '''
  17883. args_dict = dict(
  17884. option=option,
  17885. clip=clip,
  17886. flags=flags,
  17887. textpage=textpage,
  17888. sort=sort,
  17889. delimiters=delimiters,
  17890. )
  17891. return apply_pages(
  17892. path,
  17893. Page.get_text,
  17894. pagefn_kwargs=args_dict,
  17895. pages=pages,
  17896. method=method,
  17897. concurrency=concurrency,
  17898. _stats=_stats,
  17899. )
  17900. class TOOLS:
  17901. '''
  17902. We use @staticmethod to avoid the need to create an instance of this class.
  17903. '''
  17904. def _derotate_matrix(page):
  17905. if isinstance(page, mupdf.PdfPage):
  17906. return JM_py_from_matrix(JM_derotate_page_matrix(page))
  17907. else:
  17908. return JM_py_from_matrix(mupdf.FzMatrix())
  17909. @staticmethod
  17910. def _fill_widget(annot, widget):
  17911. val = JM_get_widget_properties(annot, widget)
  17912. widget.rect = Rect(annot.rect)
  17913. widget.xref = annot.xref
  17914. widget.parent = annot.parent
  17915. widget._annot = annot # backpointer to annot object
  17916. if not widget.script:
  17917. widget.script = None
  17918. if not widget.script_stroke:
  17919. widget.script_stroke = None
  17920. if not widget.script_format:
  17921. widget.script_format = None
  17922. if not widget.script_change:
  17923. widget.script_change = None
  17924. if not widget.script_calc:
  17925. widget.script_calc = None
  17926. if not widget.script_blur:
  17927. widget.script_blur = None
  17928. if not widget.script_focus:
  17929. widget.script_focus = None
  17930. return val
  17931. @staticmethod
  17932. def _get_all_contents(page):
  17933. page = _as_pdf_page(page.this)
  17934. res = JM_read_contents(page.obj())
  17935. result = JM_BinFromBuffer( res)
  17936. return result
  17937. @staticmethod
  17938. def _insert_contents(page, newcont, overlay=1):
  17939. """Add bytes as a new /Contents object for a page, and return its xref."""
  17940. pdfpage = _as_pdf_page(page, required=1)
  17941. contbuf = JM_BufferFromBytes(newcont)
  17942. xref = JM_insert_contents(pdfpage.doc(), pdfpage.obj(), contbuf, overlay)
  17943. #fixme: pdfpage->doc->dirty = 1;
  17944. return xref
  17945. @staticmethod
  17946. def _le_annot_parms(annot, p1, p2, fill_color):
  17947. """Get common parameters for making annot line end symbols.
  17948. Returns:
  17949. m: matrix that maps p1, p2 to points L, P on the x-axis
  17950. im: its inverse
  17951. L, P: transformed p1, p2
  17952. w: line width
  17953. scol: stroke color string
  17954. fcol: fill color store_shrink
  17955. opacity: opacity string (gs command)
  17956. """
  17957. w = annot.border["width"] # line width
  17958. sc = annot.colors["stroke"] # stroke color
  17959. if not sc: # black if missing
  17960. sc = (0,0,0)
  17961. scol = " ".join(map(str, sc)) + " RG\n"
  17962. if fill_color:
  17963. fc = fill_color
  17964. else:
  17965. fc = annot.colors["fill"] # fill color
  17966. if not fc:
  17967. fc = (1,1,1) # white if missing
  17968. fcol = " ".join(map(str, fc)) + " rg\n"
  17969. # nr = annot.rect
  17970. np1 = p1 # point coord relative to annot rect
  17971. np2 = p2 # point coord relative to annot rect
  17972. m = Matrix(util_hor_matrix(np1, np2)) # matrix makes the line horizontal
  17973. im = ~m # inverted matrix
  17974. L = np1 * m # converted start (left) point
  17975. R = np2 * m # converted end (right) point
  17976. if 0 <= annot.opacity < 1:
  17977. opacity = "/H gs\n"
  17978. else:
  17979. opacity = ""
  17980. return m, im, L, R, w, scol, fcol, opacity
  17981. @staticmethod
  17982. def _le_butt(annot, p1, p2, lr, fill_color):
  17983. """Make stream commands for butt line end symbol. "lr" denotes left (False) or right point.
  17984. """
  17985. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  17986. shift = 3
  17987. d = shift * max(1, w)
  17988. M = R if lr else L
  17989. top = (M + (0, -d/2.)) * im
  17990. bot = (M + (0, d/2.)) * im
  17991. ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
  17992. ap += "%f %f l\n" % (bot.x, bot.y)
  17993. ap += _format_g(w) + " w\n"
  17994. ap += scol + "s\nQ\n"
  17995. return ap
  17996. @staticmethod
  17997. def _le_circle(annot, p1, p2, lr, fill_color):
  17998. """Make stream commands for circle line end symbol. "lr" denotes left (False) or right point.
  17999. """
  18000. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  18001. shift = 2.5 # 2*shift*width = length of square edge
  18002. d = shift * max(1, w)
  18003. M = R - (d/2., 0) if lr else L + (d/2., 0)
  18004. r = Rect(M, M) + (-d, -d, d, d) # the square
  18005. ap = "q\n" + opacity + TOOLS._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im)
  18006. ap += _format_g(w) + " w\n"
  18007. ap += scol + fcol + "b\nQ\n"
  18008. return ap
  18009. @staticmethod
  18010. def _le_closedarrow(annot, p1, p2, lr, fill_color):
  18011. """Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point.
  18012. """
  18013. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  18014. shift = 2.5
  18015. d = shift * max(1, w)
  18016. p2 = R + (d/2., 0) if lr else L - (d/2., 0)
  18017. p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
  18018. p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
  18019. p1 *= im
  18020. p2 *= im
  18021. p3 *= im
  18022. ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
  18023. ap += "%f %f l\n" % (p2.x, p2.y)
  18024. ap += "%f %f l\n" % (p3.x, p3.y)
  18025. ap += _format_g(w) + " w\n"
  18026. ap += scol + fcol + "b\nQ\n"
  18027. return ap
  18028. @staticmethod
  18029. def _le_diamond(annot, p1, p2, lr, fill_color):
  18030. """Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point.
  18031. """
  18032. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  18033. shift = 2.5 # 2*shift*width = length of square edge
  18034. d = shift * max(1, w)
  18035. M = R - (d/2., 0) if lr else L + (d/2., 0)
  18036. r = Rect(M, M) + (-d, -d, d, d) # the square
  18037. # the square makes line longer by (2*shift - 1)*width
  18038. p = (r.tl + (r.bl - r.tl) * 0.5) * im
  18039. ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
  18040. p = (r.tl + (r.tr - r.tl) * 0.5) * im
  18041. ap += "%f %f l\n" % (p.x, p.y)
  18042. p = (r.tr + (r.br - r.tr) * 0.5) * im
  18043. ap += "%f %f l\n" % (p.x, p.y)
  18044. p = (r.br + (r.bl - r.br) * 0.5) * im
  18045. ap += "%f %f l\n" % (p.x, p.y)
  18046. ap += _format_g(w) + " w\n"
  18047. ap += scol + fcol + "b\nQ\n"
  18048. return ap
  18049. @staticmethod
  18050. def _le_openarrow(annot, p1, p2, lr, fill_color):
  18051. """Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point.
  18052. """
  18053. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  18054. shift = 2.5
  18055. d = shift * max(1, w)
  18056. p2 = R + (d/2., 0) if lr else L - (d/2., 0)
  18057. p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
  18058. p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
  18059. p1 *= im
  18060. p2 *= im
  18061. p3 *= im
  18062. ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
  18063. ap += "%f %f l\n" % (p2.x, p2.y)
  18064. ap += "%f %f l\n" % (p3.x, p3.y)
  18065. ap += _format_g(w) + " w\n"
  18066. ap += scol + "S\nQ\n"
  18067. return ap
  18068. @staticmethod
  18069. def _le_rclosedarrow(annot, p1, p2, lr, fill_color):
  18070. """Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point.
  18071. """
  18072. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  18073. shift = 2.5
  18074. d = shift * max(1, w)
  18075. p2 = R - (2*d, 0) if lr else L + (2*d, 0)
  18076. p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
  18077. p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
  18078. p1 *= im
  18079. p2 *= im
  18080. p3 *= im
  18081. ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
  18082. ap += "%f %f l\n" % (p2.x, p2.y)
  18083. ap += "%f %f l\n" % (p3.x, p3.y)
  18084. ap += _format_g(w) + " w\n"
  18085. ap += scol + fcol + "b\nQ\n"
  18086. return ap
  18087. @staticmethod
  18088. def _le_ropenarrow(annot, p1, p2, lr, fill_color):
  18089. """Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point.
  18090. """
  18091. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  18092. shift = 2.5
  18093. d = shift * max(1, w)
  18094. p2 = R - (d/3., 0) if lr else L + (d/3., 0)
  18095. p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
  18096. p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
  18097. p1 *= im
  18098. p2 *= im
  18099. p3 *= im
  18100. ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
  18101. ap += "%f %f l\n" % (p2.x, p2.y)
  18102. ap += "%f %f l\n" % (p3.x, p3.y)
  18103. ap += _format_g(w) + " w\n"
  18104. ap += scol + fcol + "S\nQ\n"
  18105. return ap
  18106. @staticmethod
  18107. def _le_slash(annot, p1, p2, lr, fill_color):
  18108. """Make stream commands for slash line end symbol. "lr" denotes left (False) or right point.
  18109. """
  18110. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  18111. rw = 1.1547 * max(1, w) * 1.0 # makes rect diagonal a 30 deg inclination
  18112. M = R if lr else L
  18113. r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w)
  18114. top = r.tl * im
  18115. bot = r.br * im
  18116. ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
  18117. ap += "%f %f l\n" % (bot.x, bot.y)
  18118. ap += _format_g(w) + " w\n"
  18119. ap += scol + "s\nQ\n"
  18120. return ap
  18121. @staticmethod
  18122. def _le_square(annot, p1, p2, lr, fill_color):
  18123. """Make stream commands for square line end symbol. "lr" denotes left (False) or right point.
  18124. """
  18125. m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
  18126. shift = 2.5 # 2*shift*width = length of square edge
  18127. d = shift * max(1, w)
  18128. M = R - (d/2., 0) if lr else L + (d/2., 0)
  18129. r = Rect(M, M) + (-d, -d, d, d) # the square
  18130. # the square makes line longer by (2*shift - 1)*width
  18131. p = r.tl * im
  18132. ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
  18133. p = r.tr * im
  18134. ap += "%f %f l\n" % (p.x, p.y)
  18135. p = r.br * im
  18136. ap += "%f %f l\n" % (p.x, p.y)
  18137. p = r.bl * im
  18138. ap += "%f %f l\n" % (p.x, p.y)
  18139. ap += _format_g(w) + " w\n"
  18140. ap += scol + fcol + "b\nQ\n"
  18141. return ap
  18142. @staticmethod
  18143. def _oval_string(p1, p2, p3, p4):
  18144. """Return /AP string defining an oval within a 4-polygon provided as points
  18145. """
  18146. def bezier(p, q, r):
  18147. f = "%f %f %f %f %f %f c\n"
  18148. return f % (p.x, p.y, q.x, q.y, r.x, r.y)
  18149. kappa = 0.55228474983 # magic number
  18150. ml = p1 + (p4 - p1) * 0.5 # middle points ...
  18151. mo = p1 + (p2 - p1) * 0.5 # for each ...
  18152. mr = p2 + (p3 - p2) * 0.5 # polygon ...
  18153. mu = p4 + (p3 - p4) * 0.5 # side
  18154. ol1 = ml + (p1 - ml) * kappa # the 8 bezier
  18155. ol2 = mo + (p1 - mo) * kappa # helper points
  18156. or1 = mo + (p2 - mo) * kappa
  18157. or2 = mr + (p2 - mr) * kappa
  18158. ur1 = mr + (p3 - mr) * kappa
  18159. ur2 = mu + (p3 - mu) * kappa
  18160. ul1 = mu + (p4 - mu) * kappa
  18161. ul2 = ml + (p4 - ml) * kappa
  18162. # now draw, starting from middle point of left side
  18163. ap = "%f %f m\n" % (ml.x, ml.y)
  18164. ap += bezier(ol1, ol2, mo)
  18165. ap += bezier(or1, or2, mr)
  18166. ap += bezier(ur1, ur2, mu)
  18167. ap += bezier(ul1, ul2, ml)
  18168. return ap
  18169. @staticmethod
  18170. def _parse_da(annot):
  18171. if g_use_extra:
  18172. val = extra.Tools_parse_da( annot.this)
  18173. else:
  18174. def Tools__parse_da(annot):
  18175. this_annot = annot.this
  18176. assert isinstance(this_annot, mupdf.PdfAnnot)
  18177. this_annot_obj = mupdf.pdf_annot_obj( this_annot)
  18178. pdf = mupdf.pdf_get_bound_document( this_annot_obj)
  18179. try:
  18180. da = mupdf.pdf_dict_get_inheritable( this_annot_obj, PDF_NAME('DA'))
  18181. if not da.m_internal:
  18182. trailer = mupdf.pdf_trailer(pdf)
  18183. da = mupdf.pdf_dict_getl(trailer,
  18184. PDF_NAME('Root'),
  18185. PDF_NAME('AcroForm'),
  18186. PDF_NAME('DA'),
  18187. )
  18188. da_str = mupdf.pdf_to_text_string(da)
  18189. except Exception:
  18190. if g_exceptions_verbose: exception_info()
  18191. return
  18192. return da_str
  18193. val = Tools__parse_da(annot)
  18194. if not val:
  18195. return ((0,), "", 0)
  18196. font = "Helv"
  18197. fsize = 12
  18198. col = (0, 0, 0)
  18199. dat = val.split() # split on any whitespace
  18200. for i, item in enumerate(dat):
  18201. if item == "Tf":
  18202. font = dat[i - 2][1:]
  18203. fsize = float(dat[i - 1])
  18204. dat[i] = dat[i-1] = dat[i-2] = ""
  18205. continue
  18206. if item == "g": # unicolor text
  18207. col = [(float(dat[i - 1]))]
  18208. dat[i] = dat[i-1] = ""
  18209. continue
  18210. if item == "rg": # RGB colored text
  18211. col = [float(f) for f in dat[i - 3:i]]
  18212. dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
  18213. continue
  18214. if item == "k": # CMYK colored text
  18215. col = [float(f) for f in dat[i - 4:i]]
  18216. dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = ""
  18217. continue
  18218. val = (col, font, fsize)
  18219. return val
  18220. @staticmethod
  18221. def _reset_widget(annot):
  18222. this_annot = annot
  18223. this_annot_obj = mupdf.pdf_annot_obj(this_annot)
  18224. pdf = mupdf.pdf_get_bound_document(this_annot_obj)
  18225. mupdf.pdf_field_reset(pdf, this_annot_obj)
  18226. @staticmethod
  18227. def _rotate_matrix(page):
  18228. pdfpage = page._pdf_page(required=False)
  18229. if not pdfpage.m_internal:
  18230. return JM_py_from_matrix(mupdf.FzMatrix())
  18231. return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage))
  18232. @staticmethod
  18233. def _save_widget(annot, widget):
  18234. JM_set_widget_properties(annot, widget)
  18235. def _update_da(annot, da_str):
  18236. if g_use_extra:
  18237. extra.Tools_update_da( annot.this, da_str)
  18238. else:
  18239. try:
  18240. this_annot = annot.this
  18241. assert isinstance(this_annot, mupdf.PdfAnnot)
  18242. mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DA'), da_str)
  18243. mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DS')) # /* not supported */
  18244. mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('RC')) # /* not supported */
  18245. except Exception:
  18246. if g_exceptions_verbose: exception_info()
  18247. return
  18248. return
  18249. @staticmethod
  18250. def gen_id():
  18251. global TOOLS_JM_UNIQUE_ID
  18252. TOOLS_JM_UNIQUE_ID += 1
  18253. return TOOLS_JM_UNIQUE_ID
  18254. @staticmethod
  18255. def glyph_cache_empty():
  18256. '''
  18257. Empty the glyph cache.
  18258. '''
  18259. mupdf.fz_purge_glyph_cache()
  18260. @staticmethod
  18261. def image_profile(stream, keep_image=0):
  18262. '''
  18263. Metadata of an image binary stream.
  18264. '''
  18265. return JM_image_profile(stream, keep_image)
  18266. @staticmethod
  18267. def mupdf_display_errors(on=None):
  18268. '''
  18269. Set MuPDF error display to True or False.
  18270. '''
  18271. global JM_mupdf_show_errors
  18272. if on is not None:
  18273. JM_mupdf_show_errors = bool(on)
  18274. return JM_mupdf_show_errors
  18275. @staticmethod
  18276. def mupdf_display_warnings(on=None):
  18277. '''
  18278. Set MuPDF warnings display to True or False.
  18279. '''
  18280. global JM_mupdf_show_warnings
  18281. if on is not None:
  18282. JM_mupdf_show_warnings = bool(on)
  18283. return JM_mupdf_show_warnings
  18284. @staticmethod
  18285. def mupdf_version():
  18286. '''Get version of MuPDF binary build.'''
  18287. return mupdf.FZ_VERSION
  18288. @staticmethod
  18289. def mupdf_warnings(reset=1):
  18290. '''
  18291. Get the MuPDF warnings/errors with optional reset (default).
  18292. '''
  18293. # Get any trailing `... repeated <N> times...` message.
  18294. mupdf.fz_flush_warnings()
  18295. ret = '\n'.join( JM_mupdf_warnings_store)
  18296. if reset:
  18297. TOOLS.reset_mupdf_warnings()
  18298. return ret
  18299. @staticmethod
  18300. def reset_mupdf_warnings():
  18301. global JM_mupdf_warnings_store
  18302. JM_mupdf_warnings_store = list()
  18303. @staticmethod
  18304. def set_aa_level(level):
  18305. '''
  18306. Set anti-aliasing level.
  18307. '''
  18308. mupdf.fz_set_aa_level(level)
  18309. @staticmethod
  18310. def set_annot_stem( stem=None):
  18311. global JM_annot_id_stem
  18312. if stem is None:
  18313. return JM_annot_id_stem
  18314. len_ = len(stem) + 1
  18315. if len_ > 50:
  18316. len_ = 50
  18317. JM_annot_id_stem = stem[:50]
  18318. return JM_annot_id_stem
  18319. @staticmethod
  18320. def set_font_width(doc, xref, width):
  18321. pdf = _as_pdf_document(doc, required=0)
  18322. if not pdf.m_internal:
  18323. return False
  18324. font = mupdf.pdf_load_object(pdf, xref)
  18325. dfonts = mupdf.pdf_dict_get(font, PDF_NAME('DescendantFonts'))
  18326. if mupdf.pdf_is_array(dfonts):
  18327. n = mupdf.pdf_array_len(dfonts)
  18328. for i in range(n):
  18329. dfont = mupdf.pdf_array_get(dfonts, i)
  18330. warray = mupdf.pdf_new_array(pdf, 3)
  18331. mupdf.pdf_array_push(warray, mupdf.pdf_new_int(0))
  18332. mupdf.pdf_array_push(warray, mupdf.pdf_new_int(65535))
  18333. mupdf.pdf_array_push(warray, mupdf.pdf_new_int(width))
  18334. mupdf.pdf_dict_put(dfont, PDF_NAME('W'), warray)
  18335. return True
  18336. @staticmethod
  18337. def set_graphics_min_line_width(min_line_width):
  18338. '''
  18339. Set the graphics minimum line width.
  18340. '''
  18341. mupdf.fz_set_graphics_min_line_width(min_line_width)
  18342. @staticmethod
  18343. def set_icc( on=0):
  18344. """Set ICC color handling on or off."""
  18345. if on:
  18346. if mupdf.FZ_ENABLE_ICC:
  18347. mupdf.fz_enable_icc()
  18348. else:
  18349. RAISEPY( "MuPDF built w/o ICC support",PyExc_ValueError)
  18350. elif mupdf.FZ_ENABLE_ICC:
  18351. mupdf.fz_disable_icc()
  18352. @staticmethod
  18353. def set_low_memory( on=None):
  18354. """Set / unset MuPDF device caching."""
  18355. if on is not None:
  18356. _globals.no_device_caching = bool(on)
  18357. return _globals.no_device_caching
  18358. @staticmethod
  18359. def set_small_glyph_heights(on=None):
  18360. """Set / unset small glyph heights."""
  18361. if on is not None:
  18362. _globals.small_glyph_heights = bool(on)
  18363. if g_use_extra:
  18364. extra.set_small_glyph_heights(_globals.small_glyph_heights)
  18365. return _globals.small_glyph_heights
  18366. @staticmethod
  18367. def set_subset_fontnames(on=None):
  18368. '''
  18369. Set / unset returning fontnames with their subset prefix.
  18370. '''
  18371. if on is not None:
  18372. _globals.subset_fontnames = bool(on)
  18373. if g_use_extra:
  18374. extra.set_subset_fontnames(_globals.subset_fontnames)
  18375. return _globals.subset_fontnames
  18376. @staticmethod
  18377. def show_aa_level():
  18378. '''
  18379. Show anti-aliasing values.
  18380. '''
  18381. return dict(
  18382. graphics = mupdf.fz_graphics_aa_level(),
  18383. text = mupdf.fz_text_aa_level(),
  18384. graphics_min_line_width = mupdf.fz_graphics_min_line_width(),
  18385. )
  18386. @staticmethod
  18387. def store_maxsize():
  18388. '''
  18389. MuPDF store size limit.
  18390. '''
  18391. # fixme: return gctx->store->max.
  18392. return None
  18393. @staticmethod
  18394. def store_shrink(percent):
  18395. '''
  18396. Free 'percent' of current store size.
  18397. '''
  18398. if percent >= 100:
  18399. mupdf.fz_empty_store()
  18400. return 0
  18401. if percent > 0:
  18402. mupdf.fz_shrink_store( 100 - percent)
  18403. # fixme: return gctx->store->size.
  18404. @staticmethod
  18405. def store_size():
  18406. '''
  18407. MuPDF current store size.
  18408. '''
  18409. # fixme: return gctx->store->size.
  18410. return None
  18411. @staticmethod
  18412. def unset_quad_corrections(on=None):
  18413. '''
  18414. Set ascender / descender corrections on or off.
  18415. '''
  18416. if on is not None:
  18417. _globals.skip_quad_corrections = bool(on)
  18418. if g_use_extra:
  18419. extra.set_skip_quad_corrections(_globals.skip_quad_corrections)
  18420. return _globals.skip_quad_corrections
  18421. # fixme: also defined at top-level.
  18422. JM_annot_id_stem = 'fitz'
  18423. fitz_config = JM_fitz_config()
  18424. # Callbacks not yet supported with cppyy.
  18425. if not mupdf_cppyy:
  18426. mupdf.fz_set_warning_callback(JM_mupdf_warning)
  18427. mupdf.fz_set_error_callback(JM_mupdf_error)
  18428. # If there are pending warnings when we exit, we end up in this sequence:
  18429. #
  18430. # atexit()
  18431. # -> mupdf::internal_thread_state::~internal_thread_state()
  18432. # -> fz_drop_context()
  18433. # -> fz_flush_warnings()
  18434. # -> SWIG Director code
  18435. # -> Python calling JM_mupdf_warning().
  18436. #
  18437. # Unfortunately this causes a SEGV, seemingly because the SWIG Director code has
  18438. # already been torn down.
  18439. #
  18440. # So we use a Python atexit handler to explicitly call fz_flush_warnings();
  18441. # this appears to happen early enough for the Director machinery to still
  18442. # work. So in the sequence above, fz_flush_warnings() will find that there are
  18443. # no pending warnings and will not attempt to call JM_mupdf_warning().
  18444. #
  18445. def _atexit():
  18446. #log( 'PyMuPDF/src/__init__.py:_atexit() called')
  18447. mupdf.fz_flush_warnings()
  18448. mupdf.fz_set_warning_callback(None)
  18449. mupdf.fz_set_error_callback(None)
  18450. #log( '_atexit() returning')
  18451. atexit.register( _atexit)
  18452. # List of (name, red, green, blue) where:
  18453. # name: upper-case name.
  18454. # red, green, blue: integer in range 0..255.
  18455. #
  18456. from . import _wxcolors
  18457. _wxcolors = _wxcolors._wxcolors
  18458. # Dict mapping from name to (red, green, blue).
  18459. # name: lower-case name.
  18460. # red, green, blue: float in range 0..1.
  18461. #
  18462. pdfcolor = dict()
  18463. for name, r, g, b in _wxcolors:
  18464. pdfcolor[name.lower()] = (r/255, g/255, b/255)
  18465. def colors_pdf_dict():
  18466. '''
  18467. Returns dict mapping from name to (red, green, blue).
  18468. name: lower-case name.
  18469. red, green, blue: float in range 0..1.
  18470. '''
  18471. return pdfcolor
  18472. def colors_wx_list():
  18473. '''
  18474. Returns list of (name, red, green, blue) tuples:
  18475. name: upper-case name.
  18476. red, green, blue: integers in range 0..255.
  18477. '''
  18478. return _wxcolors
  18479. # We cannot import utils earlier because it imports this .py file itself and
  18480. # uses some pymupdf.* types in function typing.
  18481. #
  18482. from . import utils
  18483. # Use utils.*() fns for some class methods.
  18484. #
  18485. recover_bbox_quad = utils.recover_bbox_quad
  18486. recover_char_quad = utils.recover_char_quad
  18487. recover_line_quad = utils.recover_line_quad
  18488. recover_quad = utils.recover_quad
  18489. recover_span_quad = utils.recover_span_quad
  18490. Annot.get_text = utils.get_text
  18491. Annot.get_textbox = utils.get_textbox
  18492. Document._do_links = utils.do_links
  18493. Document._do_widgets = utils.do_widgets
  18494. Document.del_toc_item = utils.del_toc_item
  18495. Document.get_char_widths = utils.get_char_widths
  18496. Document.get_oc = utils.get_oc
  18497. Document.get_ocmd = utils.get_ocmd
  18498. Document.get_page_labels = utils.get_page_labels
  18499. Document.get_page_numbers = utils.get_page_numbers
  18500. Document.get_page_pixmap = utils.get_page_pixmap
  18501. Document.get_page_text = utils.get_page_text
  18502. Document.get_toc = utils.get_toc
  18503. Document.has_annots = utils.has_annots
  18504. Document.has_links = utils.has_links
  18505. Document.insert_page = utils.insert_page
  18506. Document.new_page = utils.new_page
  18507. Document.scrub = utils.scrub
  18508. Document.search_page_for = utils.search_page_for
  18509. Document.set_metadata = utils.set_metadata
  18510. Document.set_oc = utils.set_oc
  18511. Document.set_ocmd = utils.set_ocmd
  18512. Document.set_page_labels = utils.set_page_labels
  18513. Document.set_toc = utils.set_toc
  18514. Document.set_toc_item = utils.set_toc_item
  18515. Document.subset_fonts = utils.subset_fonts
  18516. Document.tobytes = Document.write
  18517. Document.xref_copy = utils.xref_copy
  18518. IRect.get_area = utils.get_area
  18519. Page.apply_redactions = utils.apply_redactions
  18520. Page.delete_image = utils.delete_image
  18521. Page.delete_widget = utils.delete_widget
  18522. Page.draw_bezier = utils.draw_bezier
  18523. Page.draw_circle = utils.draw_circle
  18524. Page.draw_curve = utils.draw_curve
  18525. Page.draw_line = utils.draw_line
  18526. Page.draw_oval = utils.draw_oval
  18527. Page.draw_polyline = utils.draw_polyline
  18528. Page.draw_quad = utils.draw_quad
  18529. Page.draw_rect = utils.draw_rect
  18530. Page.draw_sector = utils.draw_sector
  18531. Page.draw_squiggle = utils.draw_squiggle
  18532. Page.draw_zigzag = utils.draw_zigzag
  18533. Page.get_image_info = utils.get_image_info
  18534. Page.get_image_rects = utils.get_image_rects
  18535. Page.get_label = utils.get_label
  18536. Page.get_links = utils.get_links
  18537. Page.get_pixmap = utils.get_pixmap
  18538. Page.get_text = utils.get_text
  18539. Page.get_text_blocks = utils.get_text_blocks
  18540. Page.get_text_selection = utils.get_text_selection
  18541. Page.get_text_words = utils.get_text_words
  18542. Page.get_textbox = utils.get_textbox
  18543. Page.get_textpage_ocr = utils.get_textpage_ocr
  18544. Page.insert_image = utils.insert_image
  18545. Page.insert_link = utils.insert_link
  18546. Page.insert_text = utils.insert_text
  18547. Page.insert_textbox = utils.insert_textbox
  18548. Page.insert_htmlbox = utils.insert_htmlbox
  18549. Page.new_shape = lambda x: utils.Shape(x)
  18550. Page.replace_image = utils.replace_image
  18551. Page.search_for = utils.search_for
  18552. Page.show_pdf_page = utils.show_pdf_page
  18553. Page.update_link = utils.update_link
  18554. Page.write_text = utils.write_text
  18555. Shape = utils.Shape
  18556. from .table import find_tables
  18557. Page.find_tables = find_tables
  18558. Rect.get_area = utils.get_area
  18559. TextWriter.fill_textbox = utils.fill_textbox
  18560. class FitzDeprecation(DeprecationWarning):
  18561. pass
  18562. def restore_aliases():
  18563. warnings.filterwarnings( "once", category=FitzDeprecation)
  18564. def showthis(msg, cat, filename, lineno, file=None, line=None):
  18565. text = warnings.formatwarning(msg, cat, filename, lineno, line=line)
  18566. s = text.find("FitzDeprecation")
  18567. if s < 0:
  18568. log(text)
  18569. return
  18570. text = text[s:].splitlines()[0][4:]
  18571. log(text)
  18572. warnings.showwarning = showthis
  18573. def _alias(class_, new_name, legacy_name=None):
  18574. '''
  18575. Adds an alias for a class_ or module item clled <class_>.<new>.
  18576. class_:
  18577. Class/module to modify; use None for the current module.
  18578. new_name:
  18579. String name of existing item, e.g. name of method.
  18580. legacy_name:
  18581. Name of legacy object to create in <class_>. If None, we generate
  18582. from <item> by removing underscores and capitalising the next
  18583. letter.
  18584. '''
  18585. if class_ is None:
  18586. class_ = sys.modules[__name__]
  18587. if not legacy_name:
  18588. legacy_name = ''
  18589. capitalise_next = False
  18590. for c in new_name:
  18591. if c == '_':
  18592. capitalise_next = True
  18593. elif capitalise_next:
  18594. legacy_name += c.upper()
  18595. capitalise_next = False
  18596. else:
  18597. legacy_name += c
  18598. new_object = getattr( class_, new_name)
  18599. assert not getattr( class_, legacy_name, None), f'class {class_} already has {legacy_name}'
  18600. if callable( new_object):
  18601. def deprecated_function( *args, **kwargs):
  18602. warnings.warn(
  18603. f'"{legacy_name=}" removed from {class_} after v1.19.0 - use "{new_name}".',
  18604. category=FitzDeprecation,
  18605. )
  18606. return new_object( *args, **kwargs)
  18607. setattr( class_, legacy_name, deprecated_function)
  18608. deprecated_function.__doc__ = (
  18609. f'*** Deprecated and removed in version after v1.19.0 - use "{new_name}". ***\n'
  18610. f'{new_object.__doc__}'
  18611. )
  18612. else:
  18613. setattr( class_, legacy_name, new_object)
  18614. _alias( Annot, 'get_file', 'fileGet')
  18615. _alias( Annot, 'get_pixmap')
  18616. _alias( Annot, 'get_sound', 'soundGet')
  18617. _alias( Annot, 'get_text')
  18618. _alias( Annot, 'get_textbox')
  18619. _alias( Annot, 'get_textpage', 'getTextPage')
  18620. _alias( Annot, 'line_ends')
  18621. _alias( Annot, 'set_blendmode', 'setBlendMode')
  18622. _alias( Annot, 'set_border')
  18623. _alias( Annot, 'set_colors')
  18624. _alias( Annot, 'set_flags')
  18625. _alias( Annot, 'set_info')
  18626. _alias( Annot, 'set_line_ends')
  18627. _alias( Annot, 'set_name')
  18628. _alias( Annot, 'set_oc', 'setOC')
  18629. _alias( Annot, 'set_opacity')
  18630. _alias( Annot, 'set_rect')
  18631. _alias( Annot, 'update_file', 'fileUpd')
  18632. _alias( DisplayList, 'get_pixmap')
  18633. _alias( DisplayList, 'get_textpage', 'getTextPage')
  18634. _alias( Document, 'chapter_count')
  18635. _alias( Document, 'chapter_page_count')
  18636. _alias( Document, 'convert_to_pdf', 'convertToPDF')
  18637. _alias( Document, 'copy_page')
  18638. _alias( Document, 'delete_page')
  18639. _alias( Document, 'delete_pages', 'deletePageRange')
  18640. _alias( Document, 'embfile_add', 'embeddedFileAdd')
  18641. _alias( Document, 'embfile_count', 'embeddedFileCount')
  18642. _alias( Document, 'embfile_del', 'embeddedFileDel')
  18643. _alias( Document, 'embfile_get', 'embeddedFileGet')
  18644. _alias( Document, 'embfile_info', 'embeddedFileInfo')
  18645. _alias( Document, 'embfile_names', 'embeddedFileNames')
  18646. _alias( Document, 'embfile_upd', 'embeddedFileUpd')
  18647. _alias( Document, 'extract_font')
  18648. _alias( Document, 'extract_image')
  18649. _alias( Document, 'find_bookmark')
  18650. _alias( Document, 'fullcopy_page')
  18651. _alias( Document, 'get_char_widths')
  18652. _alias( Document, 'get_ocgs', 'getOCGs')
  18653. _alias( Document, 'get_page_fonts', 'getPageFontList')
  18654. _alias( Document, 'get_page_images', 'getPageImageList')
  18655. _alias( Document, 'get_page_pixmap')
  18656. _alias( Document, 'get_page_text')
  18657. _alias( Document, 'get_page_xobjects', 'getPageXObjectList')
  18658. _alias( Document, 'get_sigflags', 'getSigFlags')
  18659. _alias( Document, 'get_toc', 'getToC')
  18660. _alias( Document, 'get_xml_metadata')
  18661. _alias( Document, 'insert_page')
  18662. _alias( Document, 'insert_pdf', 'insertPDF')
  18663. _alias( Document, 'is_dirty')
  18664. _alias( Document, 'is_form_pdf', 'isFormPDF')
  18665. _alias( Document, 'is_pdf', 'isPDF')
  18666. _alias( Document, 'is_reflowable')
  18667. _alias( Document, 'is_repaired')
  18668. _alias( Document, 'last_location')
  18669. _alias( Document, 'load_page')
  18670. _alias( Document, 'make_bookmark')
  18671. _alias( Document, 'move_page')
  18672. _alias( Document, 'needs_pass')
  18673. _alias( Document, 'new_page')
  18674. _alias( Document, 'next_location')
  18675. _alias( Document, 'page_count')
  18676. _alias( Document, 'page_cropbox', 'pageCropBox')
  18677. _alias( Document, 'page_xref')
  18678. _alias( Document, 'pdf_catalog', 'PDFCatalog')
  18679. _alias( Document, 'pdf_trailer', 'PDFTrailer')
  18680. _alias( Document, 'prev_location', 'previousLocation')
  18681. _alias( Document, 'resolve_link')
  18682. _alias( Document, 'search_page_for')
  18683. _alias( Document, 'set_language')
  18684. _alias( Document, 'set_metadata')
  18685. _alias( Document, 'set_toc', 'setToC')
  18686. _alias( Document, 'set_xml_metadata')
  18687. _alias( Document, 'update_object')
  18688. _alias( Document, 'update_stream')
  18689. _alias( Document, 'xref_is_stream', 'isStream')
  18690. _alias( Document, 'xref_length')
  18691. _alias( Document, 'xref_object')
  18692. _alias( Document, 'xref_stream')
  18693. _alias( Document, 'xref_stream_raw')
  18694. _alias( Document, 'xref_xml_metadata', 'metadataXML')
  18695. _alias( IRect, 'get_area')
  18696. _alias( IRect, 'get_area', 'getRectArea')
  18697. _alias( IRect, 'include_point')
  18698. _alias( IRect, 'include_rect')
  18699. _alias( IRect, 'is_empty')
  18700. _alias( IRect, 'is_infinite')
  18701. _alias( Link, 'is_external')
  18702. _alias( Link, 'set_border')
  18703. _alias( Link, 'set_colors')
  18704. _alias( Matrix, 'is_rectilinear')
  18705. _alias( Matrix, 'prerotate', 'preRotate')
  18706. _alias( Matrix, 'prescale', 'preScale')
  18707. _alias( Matrix, 'preshear', 'preShear')
  18708. _alias( Matrix, 'pretranslate', 'preTranslate')
  18709. _alias( None, 'get_pdf_now', 'getPDFnow')
  18710. _alias( None, 'get_pdf_str', 'getPDFstr')
  18711. _alias( None, 'get_text_length')
  18712. _alias( None, 'get_text_length', 'getTextlength')
  18713. _alias( None, 'image_profile', 'ImageProperties')
  18714. _alias( None, 'paper_rect', 'PaperRect')
  18715. _alias( None, 'paper_size', 'PaperSize')
  18716. _alias( None, 'paper_sizes')
  18717. _alias( None, 'planish_line')
  18718. _alias( Outline, 'is_external')
  18719. _alias( Outline, 'is_open')
  18720. _alias( Page, 'add_caret_annot')
  18721. _alias( Page, 'add_circle_annot')
  18722. _alias( Page, 'add_file_annot')
  18723. _alias( Page, 'add_freetext_annot')
  18724. _alias( Page, 'add_highlight_annot')
  18725. _alias( Page, 'add_ink_annot')
  18726. _alias( Page, 'add_line_annot')
  18727. _alias( Page, 'add_polygon_annot')
  18728. _alias( Page, 'add_polyline_annot')
  18729. _alias( Page, 'add_rect_annot')
  18730. _alias( Page, 'add_redact_annot')
  18731. _alias( Page, 'add_squiggly_annot')
  18732. _alias( Page, 'add_stamp_annot')
  18733. _alias( Page, 'add_strikeout_annot')
  18734. _alias( Page, 'add_text_annot')
  18735. _alias( Page, 'add_underline_annot')
  18736. _alias( Page, 'add_widget')
  18737. _alias( Page, 'clean_contents')
  18738. _alias( Page, 'cropbox', 'CropBox')
  18739. _alias( Page, 'cropbox_position', 'CropBoxPosition')
  18740. _alias( Page, 'delete_annot')
  18741. _alias( Page, 'delete_link')
  18742. _alias( Page, 'delete_widget')
  18743. _alias( Page, 'derotation_matrix')
  18744. _alias( Page, 'draw_bezier')
  18745. _alias( Page, 'draw_circle')
  18746. _alias( Page, 'draw_curve')
  18747. _alias( Page, 'draw_line')
  18748. _alias( Page, 'draw_oval')
  18749. _alias( Page, 'draw_polyline')
  18750. _alias( Page, 'draw_quad')
  18751. _alias( Page, 'draw_rect')
  18752. _alias( Page, 'draw_sector')
  18753. _alias( Page, 'draw_squiggle')
  18754. _alias( Page, 'draw_zigzag')
  18755. _alias( Page, 'first_annot')
  18756. _alias( Page, 'first_link')
  18757. _alias( Page, 'first_widget')
  18758. _alias( Page, 'get_contents')
  18759. _alias( Page, 'get_displaylist', 'getDisplayList')
  18760. _alias( Page, 'get_drawings')
  18761. _alias( Page, 'get_fonts', 'getFontList')
  18762. _alias( Page, 'get_image_bbox')
  18763. _alias( Page, 'get_images', 'getImageList')
  18764. _alias( Page, 'get_links')
  18765. _alias( Page, 'get_pixmap')
  18766. _alias( Page, 'get_svg_image', 'getSVGimage')
  18767. _alias( Page, 'get_text')
  18768. _alias( Page, 'get_text_blocks')
  18769. _alias( Page, 'get_text_words')
  18770. _alias( Page, 'get_textbox')
  18771. _alias( Page, 'get_textpage', 'getTextPage')
  18772. _alias( Page, 'insert_font')
  18773. _alias( Page, 'insert_image')
  18774. _alias( Page, 'insert_link')
  18775. _alias( Page, 'insert_text')
  18776. _alias( Page, 'insert_textbox')
  18777. _alias( Page, 'is_wrapped', '_isWrapped')
  18778. _alias( Page, 'load_annot')
  18779. _alias( Page, 'load_links')
  18780. _alias( Page, 'mediabox', 'MediaBox')
  18781. _alias( Page, 'mediabox_size', 'MediaBoxSize')
  18782. _alias( Page, 'new_shape')
  18783. _alias( Page, 'read_contents')
  18784. _alias( Page, 'rotation_matrix')
  18785. _alias( Page, 'search_for')
  18786. _alias( Page, 'set_cropbox', 'setCropBox')
  18787. _alias( Page, 'set_mediabox', 'setMediaBox')
  18788. _alias( Page, 'set_rotation')
  18789. _alias( Page, 'show_pdf_page', 'showPDFpage')
  18790. _alias( Page, 'transformation_matrix')
  18791. _alias( Page, 'update_link')
  18792. _alias( Page, 'wrap_contents')
  18793. _alias( Page, 'write_text')
  18794. _alias( Pixmap, 'clear_with')
  18795. _alias( Pixmap, 'copy', 'copyPixmap')
  18796. _alias( Pixmap, 'gamma_with')
  18797. _alias( Pixmap, 'invert_irect', 'invertIRect')
  18798. _alias( Pixmap, 'pil_save', 'pillowWrite')
  18799. _alias( Pixmap, 'pil_tobytes', 'pillowData')
  18800. _alias( Pixmap, 'save', 'writeImage')
  18801. _alias( Pixmap, 'save', 'writePNG')
  18802. _alias( Pixmap, 'set_alpha')
  18803. _alias( Pixmap, 'set_dpi', 'setResolution')
  18804. _alias( Pixmap, 'set_origin')
  18805. _alias( Pixmap, 'set_pixel')
  18806. _alias( Pixmap, 'set_rect')
  18807. _alias( Pixmap, 'tint_with')
  18808. _alias( Pixmap, 'tobytes', 'getImageData')
  18809. _alias( Pixmap, 'tobytes', 'getPNGData')
  18810. _alias( Pixmap, 'tobytes', 'getPNGdata')
  18811. _alias( Quad, 'is_convex')
  18812. _alias( Quad, 'is_empty')
  18813. _alias( Quad, 'is_rectangular')
  18814. _alias( Rect, 'get_area')
  18815. _alias( Rect, 'get_area', 'getRectArea')
  18816. _alias( Rect, 'include_point')
  18817. _alias( Rect, 'include_rect')
  18818. _alias( Rect, 'is_empty')
  18819. _alias( Rect, 'is_infinite')
  18820. _alias( TextWriter, 'fill_textbox')
  18821. _alias( TextWriter, 'write_text')
  18822. _alias( utils.Shape, 'draw_bezier')
  18823. _alias( utils.Shape, 'draw_circle')
  18824. _alias( utils.Shape, 'draw_curve')
  18825. _alias( utils.Shape, 'draw_line')
  18826. _alias( utils.Shape, 'draw_oval')
  18827. _alias( utils.Shape, 'draw_polyline')
  18828. _alias( utils.Shape, 'draw_quad')
  18829. _alias( utils.Shape, 'draw_rect')
  18830. _alias( utils.Shape, 'draw_sector')
  18831. _alias( utils.Shape, 'draw_squiggle')
  18832. _alias( utils.Shape, 'draw_zigzag')
  18833. _alias( utils.Shape, 'insert_text')
  18834. _alias( utils.Shape, 'insert_textbox')
  18835. if 0:
  18836. restore_aliases()
  18837. __version__ = VersionBind
  18838. __doc__ = (
  18839. f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n'
  18840. f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n'
  18841. )