| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343193441934519346193471934819349193501935119352193531935419355193561935719358193591936019361193621936319364193651936619367193681936919370193711937219373193741937519376193771937819379193801938119382193831938419385193861938719388193891939019391193921939319394193951939619397193981939919400194011940219403194041940519406194071940819409194101941119412194131941419415194161941719418194191942019421194221942319424194251942619427194281942919430194311943219433194341943519436194371943819439194401944119442194431944419445194461944719448194491945019451194521945319454194551945619457194581945919460194611946219463194641946519466194671946819469194701947119472194731947419475194761947719478194791948019481194821948319484194851948619487194881948919490194911949219493194941949519496194971949819499195001950119502195031950419505195061950719508195091951019511195121951319514195151951619517195181951919520195211952219523195241952519526195271952819529195301953119532195331953419535195361953719538195391954019541195421954319544195451954619547195481954919550195511955219553195541955519556195571955819559195601956119562195631956419565195661956719568195691957019571195721957319574195751957619577195781957919580195811958219583195841958519586195871958819589195901959119592195931959419595195961959719598195991960019601196021960319604196051960619607196081960919610196111961219613196141961519616196171961819619196201962119622196231962419625196261962719628196291963019631196321963319634196351963619637196381963919640196411964219643196441964519646196471964819649196501965119652196531965419655196561965719658196591966019661196621966319664196651966619667196681966919670196711967219673196741967519676196771967819679196801968119682196831968419685196861968719688196891969019691196921969319694196951969619697196981969919700197011970219703197041970519706197071970819709197101971119712197131971419715197161971719718197191972019721197221972319724197251972619727197281972919730197311973219733197341973519736197371973819739197401974119742197431974419745197461974719748197491975019751197521975319754197551975619757197581975919760197611976219763197641976519766197671976819769197701977119772197731977419775197761977719778197791978019781197821978319784197851978619787197881978919790197911979219793197941979519796197971979819799198001980119802198031980419805198061980719808198091981019811198121981319814198151981619817198181981919820198211982219823198241982519826198271982819829198301983119832198331983419835198361983719838198391984019841198421984319844198451984619847198481984919850198511985219853198541985519856198571985819859198601986119862198631986419865198661986719868198691987019871198721987319874198751987619877198781987919880198811988219883198841988519886198871988819889198901989119892198931989419895198961989719898198991990019901199021990319904199051990619907199081990919910199111991219913199141991519916199171991819919199201992119922199231992419925199261992719928199291993019931199321993319934199351993619937199381993919940199411994219943199441994519946199471994819949199501995119952199531995419955199561995719958199591996019961199621996319964199651996619967199681996919970199711997219973199741997519976199771997819979199801998119982199831998419985199861998719988199891999019991199921999319994199951999619997199981999920000200012000220003200042000520006200072000820009200102001120012200132001420015200162001720018200192002020021200222002320024200252002620027200282002920030200312003220033200342003520036200372003820039200402004120042200432004420045200462004720048200492005020051200522005320054200552005620057200582005920060200612006220063200642006520066200672006820069200702007120072200732007420075200762007720078200792008020081200822008320084200852008620087200882008920090200912009220093200942009520096200972009820099201002010120102201032010420105201062010720108201092011020111201122011320114201152011620117201182011920120201212012220123201242012520126201272012820129201302013120132201332013420135201362013720138201392014020141201422014320144201452014620147201482014920150201512015220153201542015520156201572015820159201602016120162201632016420165201662016720168201692017020171201722017320174201752017620177201782017920180201812018220183201842018520186201872018820189201902019120192201932019420195201962019720198201992020020201202022020320204202052020620207202082020920210202112021220213202142021520216202172021820219202202022120222202232022420225202262022720228202292023020231202322023320234202352023620237202382023920240202412024220243202442024520246202472024820249202502025120252202532025420255202562025720258202592026020261202622026320264202652026620267202682026920270202712027220273202742027520276202772027820279202802028120282202832028420285202862028720288202892029020291202922029320294202952029620297202982029920300203012030220303203042030520306203072030820309203102031120312203132031420315203162031720318203192032020321203222032320324203252032620327203282032920330203312033220333203342033520336203372033820339203402034120342203432034420345203462034720348203492035020351203522035320354203552035620357203582035920360203612036220363203642036520366203672036820369203702037120372203732037420375203762037720378203792038020381203822038320384203852038620387203882038920390203912039220393203942039520396203972039820399204002040120402204032040420405204062040720408204092041020411204122041320414204152041620417204182041920420204212042220423204242042520426204272042820429204302043120432204332043420435204362043720438204392044020441204422044320444204452044620447204482044920450204512045220453204542045520456204572045820459204602046120462204632046420465204662046720468204692047020471204722047320474204752047620477204782047920480204812048220483204842048520486204872048820489204902049120492204932049420495204962049720498204992050020501205022050320504205052050620507205082050920510205112051220513205142051520516205172051820519205202052120522205232052420525205262052720528205292053020531205322053320534205352053620537205382053920540205412054220543205442054520546205472054820549205502055120552205532055420555205562055720558205592056020561205622056320564205652056620567205682056920570205712057220573205742057520576205772057820579205802058120582205832058420585205862058720588205892059020591205922059320594205952059620597205982059920600206012060220603206042060520606206072060820609206102061120612206132061420615206162061720618206192062020621206222062320624206252062620627206282062920630206312063220633206342063520636206372063820639206402064120642206432064420645206462064720648206492065020651206522065320654206552065620657206582065920660206612066220663206642066520666206672066820669206702067120672206732067420675206762067720678206792068020681206822068320684206852068620687206882068920690206912069220693206942069520696206972069820699207002070120702207032070420705207062070720708207092071020711207122071320714207152071620717207182071920720207212072220723207242072520726207272072820729207302073120732207332073420735207362073720738207392074020741207422074320744207452074620747207482074920750207512075220753207542075520756207572075820759207602076120762207632076420765207662076720768207692077020771207722077320774207752077620777207782077920780207812078220783207842078520786207872078820789207902079120792207932079420795207962079720798207992080020801208022080320804208052080620807208082080920810208112081220813208142081520816208172081820819208202082120822208232082420825208262082720828208292083020831208322083320834208352083620837208382083920840208412084220843208442084520846208472084820849208502085120852208532085420855208562085720858208592086020861208622086320864208652086620867208682086920870208712087220873208742087520876208772087820879208802088120882208832088420885208862088720888208892089020891208922089320894208952089620897208982089920900209012090220903209042090520906209072090820909209102091120912209132091420915209162091720918209192092020921209222092320924209252092620927209282092920930209312093220933209342093520936209372093820939209402094120942209432094420945209462094720948209492095020951209522095320954209552095620957209582095920960209612096220963209642096520966209672096820969209702097120972209732097420975209762097720978209792098020981209822098320984209852098620987209882098920990209912099220993209942099520996209972099820999210002100121002210032100421005210062100721008210092101021011210122101321014210152101621017210182101921020210212102221023210242102521026210272102821029210302103121032210332103421035210362103721038210392104021041210422104321044210452104621047210482104921050210512105221053210542105521056210572105821059210602106121062210632106421065210662106721068210692107021071210722107321074210752107621077210782107921080210812108221083210842108521086210872108821089210902109121092210932109421095210962109721098210992110021101211022110321104211052110621107211082110921110211112111221113211142111521116211172111821119211202112121122211232112421125211262112721128211292113021131211322113321134211352113621137211382113921140211412114221143211442114521146211472114821149211502115121152211532115421155211562115721158211592116021161211622116321164211652116621167211682116921170211712117221173211742117521176211772117821179211802118121182211832118421185211862118721188211892119021191211922119321194211952119621197211982119921200212012120221203212042120521206212072120821209212102121121212212132121421215212162121721218212192122021221212222122321224212252122621227212282122921230212312123221233212342123521236212372123821239212402124121242212432124421245212462124721248212492125021251212522125321254212552125621257212582125921260212612126221263212642126521266212672126821269212702127121272212732127421275212762127721278212792128021281212822128321284212852128621287212882128921290212912129221293212942129521296212972129821299213002130121302213032130421305213062130721308 |
- '''
- PyMuPDF implemented on top of MuPDF Python bindings.
- License:
- SPDX-License-Identifier: GPL-3.0-only
- '''
- # To reduce startup times, we don't import everything we require here.
- #
- import atexit
- import binascii
- import collections
- import inspect
- import io
- import math
- import os
- import pathlib
- import glob
- import re
- import string
- import sys
- import tarfile
- import time
- import typing
- import warnings
- import weakref
- import zipfile
- from . import extra
- # Set up g_out_log and g_out_message from environment variables.
- #
- # PYMUPDF_MESSAGE controls the destination of user messages (from function
- # `pymupdf.message()`).
- #
- # PYMUPDF_LOG controls the destination of internal development logging (from
- # function `pymupdf.log()`).
- #
- # For syntax, see _make_output()'s `text` arg.
- #
- def _make_output(
- *,
- text=None,
- fd=None,
- stream=None,
- path=None,
- path_append=None,
- pylogging=None,
- pylogging_logger=None,
- pylogging_level=None,
- pylogging_name=None,
- default=None,
- ):
- '''
- Returns a stream that writes to a specified destination, which can be a
- file descriptor, a file, an existing stream or Python's `logging' system.
-
- Args:
- text: text specification of destination.
- fd:<int> - write to file descriptor.
- path:<str> - write to file.
- path+:<str> - append to file.
- logging:<items> - write to Python `logging` module.
- items: comma-separated <name=value> pairs.
- level=<int>
- name=<str>.
- Other names are ignored.
-
- fd: an int file descriptor.
- stream: something with methods .write(text) and .flush().
- If specified we simply return <stream>.
- path: a file path.
- If specified we return a stream that writes to this file.
- path_append: a file path.
- If specified we return a stream that appends to this file.
- pylogging*:
- if any of these args is not None, we return a stream that writes to
- Python's `logging` module.
-
- pylogging:
- Unused other than to activate use of logging module.
- pylogging_logger:
- A logging.Logger; If None, set from <pylogging_name>.
- pylogging_level:
- An int log level, if None we use
- pylogging_logger.getEffectiveLevel().
- pylogging_name:
- Only used if <pylogging_logger> is None:
- If <pylogging_name> is None, we set it to 'pymupdf'.
- Then we do: pylogging_logger = logging.getLogger(pylogging_name)
- '''
- if text is not None:
- # Textual specification, for example from from environment variable.
- if text.startswith('fd:'):
- fd = int(text[3:])
- elif text.startswith('path:'):
- path = text[5:]
- elif text.startswith('path+'):
- path_append = text[5:]
- elif text.startswith('logging:'):
- pylogging = True
- items_d = dict()
- items = text[8:].split(',')
- #items_d = {n: v for (n, v) in [item.split('=', 1) for item in items]}
- for item in items:
- if not item:
- continue
- nv = item.split('=', 1)
- assert len(nv) == 2, f'Need `=` in {item=}.'
- n, v = nv
- items_d[n] = v
- pylogging_level = items_d.get('level')
- if pylogging_level is not None:
- pylogging_level = int(pylogging_level)
- pylogging_name = items_d.get('name', 'pymupdf')
- else:
- assert 0, f'Expected prefix `fd:`, `path:`. `path+:` or `logging:` in {text=}.'
-
- if fd is not None:
- ret = open(fd, mode='w', closefd=False)
- elif stream is not None:
- assert hasattr(stream, 'write')
- assert hasattr(stream, 'flush')
- ret = stream
- elif path is not None:
- ret = open(path, 'w')
- elif path_append is not None:
- ret = open(path_append, 'a')
- elif (0
- or pylogging is not None
- or pylogging_logger is not None
- or pylogging_level is not None
- or pylogging_name is not None
- ):
- import logging
- if pylogging_logger is None:
- if pylogging_name is None:
- pylogging_name = 'pymupdf'
- pylogging_logger = logging.getLogger(pylogging_name)
- assert isinstance(pylogging_logger, logging.Logger)
- if pylogging_level is None:
- pylogging_level = pylogging_logger.getEffectiveLevel()
- class Out:
- def write(self, text):
- # `logging` module appends newlines, but so does the `print()`
- # functions in our caller message() and log() fns, so we need to
- # remove them here.
- text = text.rstrip('\n')
- if text:
- pylogging_logger.log(pylogging_level, text)
- def flush(self):
- pass
- ret = Out()
- else:
- ret = default
- return ret
- # Set steam used by PyMuPDF messaging.
- _g_out_message = _make_output(text=os.environ.get('PYMUPDF_MESSAGE'), default=sys.stdout)
- # Set steam used by PyMuPDF development/debugging logging.
- _g_out_log = _make_output(text=os.environ.get('PYMUPDF_LOG'), default=sys.stdout)
- # Things for testing logging.
- _g_log_items = list()
- _g_log_items_active = False
- def _log_items():
- return _g_log_items
- def _log_items_active(active):
- global _g_log_items_active
- _g_log_items_active = active
-
- def _log_items_clear():
- del _g_log_items[:]
- def set_messages(
- *,
- text=None,
- fd=None,
- stream=None,
- path=None,
- path_append=None,
- pylogging=None,
- pylogging_logger=None,
- pylogging_level=None,
- pylogging_name=None,
- ):
- '''
- Sets destination of PyMuPDF messages. See _make_output() for details.
- '''
- global _g_out_message
- _g_out_message = _make_output(
- text=text,
- fd=fd,
- stream=stream,
- path=path,
- path_append=path_append,
- pylogging=pylogging,
- pylogging_logger=pylogging_logger,
- pylogging_level=pylogging_level,
- pylogging_name=pylogging_name,
- default=_g_out_message,
- )
- def set_log(
- *,
- text=None,
- fd=None,
- stream=None,
- path=None,
- path_append=None,
- pylogging=None,
- pylogging_logger=None,
- pylogging_level=None,
- pylogging_name=None,
- ):
- '''
- Sets destination of PyMuPDF development/debugging logging. See
- _make_output() for details.
- '''
- global _g_out_log
- _g_out_log = _make_output(
- text=text,
- fd=fd,
- stream=stream,
- path=path,
- path_append=path_append,
- pylogging=pylogging,
- pylogging_logger=pylogging_logger,
- pylogging_level=pylogging_level,
- pylogging_name=pylogging_name,
- default=_g_out_log,
- )
- def log( text='', caller=1):
- '''
- For development/debugging diagnostics.
- '''
- try:
- stack = inspect.stack(context=0)
- except StopIteration:
- pass
- else:
- frame_record = stack[caller]
- try:
- filename = os.path.relpath(frame_record.filename)
- except Exception: # Can fail on windows.
- filename = frame_record.filename
- line = frame_record.lineno
- function = frame_record.function
- text = f'{filename}:{line}:{function}(): {text}'
- if _g_log_items_active:
- _g_log_items.append(text)
- if _g_out_log:
- print(text, file=_g_out_log, flush=1)
- def message(text=''):
- '''
- For user messages.
- '''
- # It looks like `print()` does nothing if sys.stdout is None (without
- # raising an exception), but we don't rely on this.
- if _g_out_message:
- print(text, file=_g_out_message, flush=1)
- def exception_info():
- import traceback
- log(f'exception_info:')
- log(traceback.format_exc())
- # PDF names must not contain these characters:
- INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0))
- def get_env_bool( name, default):
- '''
- Returns `True`, `False` or `default` depending on whether $<name> is '1',
- '0' or unset. Otherwise assert-fails.
- '''
- v = os.environ.get( name)
- if v is None:
- ret = default
- elif v == '1':
- ret = True
- elif v == '0':
- ret = False
- else:
- assert 0, f'Unrecognised value for {name}: {v!r}'
- if ret != default:
- log(f'Using non-default setting from {name}: {v!r}')
- return ret
- def get_env_int( name, default):
- '''
- Returns `True`, `False` or `default` depending on whether $<name> is '1',
- '0' or unset. Otherwise assert-fails.
- '''
- v = os.environ.get( name)
- if v is None:
- ret = default
- else:
- ret = int(v)
- if ret != default:
- log(f'Using non-default setting from {name}: {v}')
- return ret
- # All our `except ...` blocks output diagnostics if `g_exceptions_verbose` is
- # true.
- g_exceptions_verbose = get_env_int( 'PYMUPDF_EXCEPTIONS_VERBOSE', 1)
- # $PYMUPDF_USE_EXTRA overrides whether to use optimised C fns in `extra`.
- #
- g_use_extra = get_env_bool( 'PYMUPDF_USE_EXTRA', True)
- # Global switches
- #
- class _Globals:
- def __init__(self):
- self.no_device_caching = 0
- self.small_glyph_heights = 0
- self.subset_fontnames = 0
- self.skip_quad_corrections = 0
- _globals = _Globals()
- # Optionally use MuPDF via cppyy bindings; experimental and not tested recently
- # as of 2023-01-20 11:51:40
- #
- mupdf_cppyy = os.environ.get( 'MUPDF_CPPYY')
- if mupdf_cppyy is not None:
- # pylint: disable=all
- log( f'{__file__}: $MUPDF_CPPYY={mupdf_cppyy!r} so attempting to import mupdf_cppyy.')
- log( f'{__file__}: $PYTHONPATH={os.environ["PYTHONPATH"]}')
- if mupdf_cppyy == '':
- import mupdf_cppyy
- else:
- import importlib
- mupdf_cppyy = importlib.machinery.SourceFileLoader(
- 'mupdf_cppyy',
- mupdf_cppyy
- ).load_module()
- mupdf = mupdf_cppyy.cppyy.gbl.mupdf
- else:
- # Use MuPDF Python SWIG bindings. We allow import from either our own
- # directory for conventional wheel installs, or from separate place in case
- # we are using a separately-installed system installation of mupdf.
- #
- try:
- from . import mupdf
- except Exception:
- import mupdf
- if hasattr(mupdf, 'internal_check_ndebug'):
- mupdf.internal_check_ndebug()
- mupdf.reinit_singlethreaded()
- def _int_rc(text):
- '''
- Converts string to int, ignoring trailing 'rc...'.
- '''
- rc = text.find('rc')
- if rc >= 0:
- text = text[:rc]
- return int(text)
- # Basic version information.
- #
- # (We use `noqa F401` to avoid flake8 errors such as `F401
- # '._build.mupdf_location' imported but unused`.
- #
- from ._build import mupdf_location # noqa F401
- from ._build import pymupdf_git_branch # noqa F401
- from ._build import pymupdf_git_diff # noqa F401
- from ._build import pymupdf_git_sha # noqa F401
- from ._build import pymupdf_version # noqa F401
- from ._build import swig_version # noqa F401
- from ._build import swig_version_tuple # noqa F401
- mupdf_version = mupdf.FZ_VERSION
- # Removed in PyMuPDF-1.26.1.
- pymupdf_date = None
- # Versions as tuples; useful when comparing versions.
- #
- pymupdf_version_tuple = tuple( [_int_rc(i) for i in pymupdf_version.split('.')])
- mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')])
- assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \
- f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}'
- # Legacy version information.
- #
- version = (pymupdf_version, mupdf_version, None)
- VersionFitz = mupdf_version
- VersionBind = pymupdf_version
- VersionDate = None
- # String formatting.
- def _format_g(value, *, fmt='%g'):
- '''
- Returns `value` formatted with mupdf.fz_format_double() if available,
- otherwise with Python's `%`.
- If `value` is a list or tuple, we return a space-separated string of
- formatted values.
- '''
- if isinstance(value, (list, tuple)):
- ret = ''
- for v in value:
- if ret:
- ret += ' '
- ret += _format_g(v, fmt=fmt)
- return ret
- else:
- return mupdf.fz_format_double(fmt, value)
-
- format_g = _format_g
- # ByteString is gone from typing in 3.14.
- # collections.abc.Buffer available from 3.12 only
- try:
- ByteString = typing.ByteString
- except AttributeError:
- ByteString = bytes | bytearray | memoryview
- # Names required by class method typing annotations.
- OptBytes = typing.Optional[ByteString]
- OptDict = typing.Optional[dict]
- OptFloat = typing.Optional[float]
- OptInt = typing.Union[int, None]
- OptSeq = typing.Optional[typing.Sequence]
- OptStr = typing.Optional[str]
- Page = 'Page_forward_decl'
- Point = 'Point_forward_decl'
- matrix_like = 'matrix_like'
- point_like = 'point_like'
- quad_like = 'quad_like'
- rect_like = 'rect_like'
- def _as_fz_document(document):
- '''
- Returns document as a mupdf.FzDocument, upcasting as required. Raises
- 'document closed' exception if closed.
- '''
- if isinstance(document, Document):
- if document.is_closed:
- raise ValueError('document closed')
- document = document.this
- if isinstance(document, mupdf.FzDocument):
- return document
- elif isinstance(document, mupdf.PdfDocument):
- return document.super()
- elif document is None:
- assert 0, f'document is None'
- else:
- assert 0, f'Unrecognised {type(document)=}'
- def _as_pdf_document(document, required=True):
- '''
- Returns `document` downcast to a mupdf.PdfDocument. If downcast fails (i.e.
- `document` is not actually a `PdfDocument`) then we assert-fail if `required`
- is true (the default) else return a `mupdf.PdfDocument` with `.m_internal`
- false.
- '''
- if isinstance(document, Document):
- if document.is_closed:
- raise ValueError('document closed')
- document = document.this
- if isinstance(document, mupdf.PdfDocument):
- return document
- elif isinstance(document, mupdf.FzDocument):
- ret = mupdf.PdfDocument(document)
- if required:
- assert ret.m_internal
- return ret
- elif document is None:
- assert 0, f'document is None'
- else:
- assert 0, f'Unrecognised {type(document)=}'
- def _as_fz_page(page):
- '''
- Returns page as a mupdf.FzPage, upcasting as required.
- '''
- if isinstance(page, Page):
- page = page.this
- if isinstance(page, mupdf.PdfPage):
- return page.super()
- elif isinstance(page, mupdf.FzPage):
- return page
- elif page is None:
- assert 0, f'page is None'
- else:
- assert 0, f'Unrecognised {type(page)=}'
- def _as_pdf_page(page, required=True):
- '''
- Returns `page` downcast to a mupdf.PdfPage. If downcast fails (i.e. `page`
- is not actually a `PdfPage`) then we assert-fail if `required` is true (the
- default) else return a `mupdf.PdfPage` with `.m_internal` false.
- '''
- if isinstance(page, Page):
- page = page.this
- if isinstance(page, mupdf.PdfPage):
- return page
- elif isinstance(page, mupdf.FzPage):
- ret = mupdf.pdf_page_from_fz_page(page)
- if required:
- assert ret.m_internal
- return ret
- elif page is None:
- assert 0, f'page is None'
- else:
- assert 0, f'Unrecognised {type(page)=}'
- def _pdf_annot_page(annot):
- '''
- Wrapper for mupdf.pdf_annot_page() which raises an exception if <annot>
- is not bound to a page instead of returning a mupdf.PdfPage with
- `.m_internal=None`.
- [Some other MuPDF functions such as pdf_update_annot()` already raise a
- similar exception if a pdf_annot's .page field is null.]
- '''
- page = mupdf.pdf_annot_page(annot)
- if not page.m_internal:
- raise RuntimeError('Annot is not bound to a page')
- return page
- # Fixme: we don't support JM_MEMORY=1.
- JM_MEMORY = 0
- # Classes
- #
- class Annot:
- def __init__(self, annot):
- assert isinstance( annot, mupdf.PdfAnnot)
- self.this = annot
- def __repr__(self):
- parent = getattr(self, 'parent', '<>')
- return "'%s' annotation on %s" % (self.type[1], str(parent))
- def __str__(self):
- return self.__repr__()
- def _erase(self):
- if getattr(self, "thisown", False):
- self.thisown = False
- def _get_redact_values(self):
- annot = self.this
- if mupdf.pdf_annot_type(annot) != mupdf.PDF_ANNOT_REDACT:
- return
- values = dict()
- try:
- obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "RO")
- if obj.m_internal:
- message_warning("Ignoring redaction key '/RO'.")
- xref = mupdf.pdf_to_num(obj)
- values[dictkey_xref] = xref
- obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "OverlayText")
- if obj.m_internal:
- text = mupdf.pdf_to_text_string(obj)
- values[dictkey_text] = JM_UnicodeFromStr(text)
- else:
- values[dictkey_text] = ''
- obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'))
- align = 0
- if obj.m_internal:
- align = mupdf.pdf_to_int(obj)
- values[dictkey_align] = align
- except Exception:
- if g_exceptions_verbose: exception_info()
- return
- val = values
- if not val:
- return val
- val["rect"] = self.rect
- text_color, fontname, fontsize = TOOLS._parse_da(self)
- val["text_color"] = text_color
- val["fontname"] = fontname
- val["fontsize"] = fontsize
- fill = self.colors["fill"]
- val["fill"] = fill
- return val
- def _getAP(self):
- if g_use_extra:
- assert isinstance( self.this, mupdf.PdfAnnot)
- ret = extra.Annot_getAP(self.this)
- assert isinstance( ret, bytes)
- return ret
- else:
- r = None
- res = None
- annot = self.this
- assert isinstance( annot, mupdf.PdfAnnot)
- annot_obj = mupdf.pdf_annot_obj( annot)
- ap = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if mupdf.pdf_is_stream( ap):
- res = mupdf.pdf_load_stream( ap)
- if res and res.m_internal:
- r = JM_BinFromBuffer(res)
- return r
- def _setAP(self, buffer_, rect=0):
- try:
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- page = _pdf_annot_page(annot)
- apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if not apobj.m_internal:
- raise RuntimeError( MSG_BAD_APN)
- if not mupdf.pdf_is_stream( apobj):
- raise RuntimeError( MSG_BAD_APN)
- res = JM_BufferFromBytes( buffer_)
- if not res.m_internal:
- raise ValueError( MSG_BAD_BUFFER)
- JM_update_stream( page.doc(), apobj, res, 1)
- if rect:
- bbox = mupdf.pdf_dict_get_rect( annot_obj, PDF_NAME('Rect'))
- mupdf.pdf_dict_put_rect( apobj, PDF_NAME('BBox'), bbox)
- except Exception:
- if g_exceptions_verbose: exception_info()
- def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotate=-1):
- annot = self.this
- assert annot.m_internal
- annot_obj = mupdf.pdf_annot_obj( annot)
- page = _pdf_annot_page(annot)
- pdf = page.doc()
- type_ = mupdf.pdf_annot_type( annot)
- nfcol, fcol = JM_color_FromSequence(fill_color)
- try:
- # remove fill color from unsupported annots
- # or if so requested
- if nfcol == 0 or type_ not in (
- mupdf.PDF_ANNOT_SQUARE,
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON
- ):
- mupdf.pdf_dict_del( annot_obj, PDF_NAME('IC'))
- elif nfcol > 0:
- mupdf.pdf_set_annot_interior_color( annot, fcol[:nfcol])
- insert_rot = 1 if rotate >= 0 else 0
- if type_ not in (
- mupdf.PDF_ANNOT_CARET,
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_FILE_ATTACHMENT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- mupdf.PDF_ANNOT_STAMP,
- mupdf.PDF_ANNOT_TEXT,
- ):
- insert_rot = 0
- if insert_rot:
- mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
- # insert fill color
- if type_ == mupdf.PDF_ANNOT_FREE_TEXT:
- if nfcol > 0:
- mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
- elif nfcol > 0:
- col = mupdf.pdf_new_array(page.doc(), nfcol)
- for i in range( nfcol):
- mupdf.pdf_array_push_real(col, fcol[i])
- mupdf.pdf_dict_put(annot_obj, PDF_NAME('IC'), col)
- mupdf.pdf_dirty_annot(annot)
- mupdf.pdf_update_annot(annot) # let MuPDF update
- pdf.resynth_required = 0
- except Exception as e:
- if g_exceptions_verbose:
- exception_info()
- message( f'cannot update annot: {e}')
- raise
-
- if (opacity < 0 or opacity >= 1) and not blend_mode: # no opacity, no blend_mode
- return True
- try: # create or update /ExtGState
- ap = mupdf.pdf_dict_getl(
- mupdf.pdf_annot_obj(annot),
- PDF_NAME('AP'),
- PDF_NAME('N')
- )
- if not ap.m_internal: # should never happen
- raise RuntimeError( MSG_BAD_APN)
- resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources'))
- if not resources.m_internal: # no Resources yet: make one
- resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2)
-
- alp0 = mupdf.pdf_new_dict( page.doc(), 3)
- if opacity >= 0 and opacity < 1:
- mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity)
- mupdf.pdf_dict_put_real( alp0, PDF_NAME('ca'), opacity)
- mupdf.pdf_dict_put_real( annot_obj, PDF_NAME('CA'), opacity)
- if blend_mode:
- mupdf.pdf_dict_put_name( alp0, PDF_NAME('BM'), blend_mode)
- mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('BM'), blend_mode)
- extg = mupdf.pdf_dict_get( resources, PDF_NAME('ExtGState'))
- if not extg.m_internal: # no ExtGState yet: make one
- extg = mupdf.pdf_dict_put_dict( resources, PDF_NAME('ExtGState'), 2)
- mupdf.pdf_dict_put( extg, PDF_NAME('H'), alp0)
- except Exception as e:
- if g_exceptions_verbose: exception_info()
- message( f'cannot set opacity or blend mode\n: {e}')
- raise
- return True
- @property
- def apn_bbox(self):
- """annotation appearance bbox"""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if not ap.m_internal:
- val = JM_py_from_rect(mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE))
- else:
- rect = mupdf.pdf_dict_get_rect(ap, PDF_NAME('BBox'))
- val = JM_py_from_rect(rect)
- val = Rect(val) * self.get_parent().transformation_matrix
- val *= self.get_parent().derotation_matrix
- return val
- @property
- def apn_matrix(self):
- """annotation appearance matrix"""
- try:
- CheckParent(self)
- annot = self.this
- assert isinstance(annot, mupdf.PdfAnnot)
- ap = mupdf.pdf_dict_getl(
- mupdf.pdf_annot_obj(annot),
- mupdf.PDF_ENUM_NAME_AP,
- mupdf.PDF_ENUM_NAME_N
- )
- if not ap.m_internal:
- return JM_py_from_matrix(mupdf.FzMatrix())
- mat = mupdf.pdf_dict_get_matrix(ap, mupdf.PDF_ENUM_NAME_Matrix)
- val = JM_py_from_matrix(mat)
- val = Matrix(val)
- return val
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- @property
- def blendmode(self):
- """annotation BlendMode"""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('BM'))
- blend_mode = None
- if obj.m_internal:
- blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(obj))
- return blend_mode
- # loop through the /AP/N/Resources/ExtGState objects
- obj = mupdf.pdf_dict_getl(
- annot_obj,
- PDF_NAME('AP'),
- PDF_NAME('N'),
- PDF_NAME('Resources'),
- PDF_NAME('ExtGState'),
- )
- if mupdf.pdf_is_dict(obj):
- n = mupdf.pdf_dict_len(obj)
- for i in range(n):
- obj1 = mupdf.pdf_dict_get_val(obj, i)
- if mupdf.pdf_is_dict(obj1):
- m = mupdf.pdf_dict_len(obj1)
- for j in range(m):
- obj2 = mupdf.pdf_dict_get_key(obj1, j)
- if mupdf.pdf_objcmp(obj2, PDF_NAME('BM')) == 0:
- blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(mupdf.pdf_dict_get_val(obj1, j)))
- return blend_mode
- return blend_mode
- @property
- def border(self):
- """Border information."""
- CheckParent(self)
- atype = self.type[0]
- if atype not in (
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- ):
- return dict()
- ao = mupdf.pdf_annot_obj(self.this)
- ret = JM_annot_border(ao)
- return ret
- def clean_contents(self, sanitize=1):
- """Clean appearance contents stream."""
- CheckParent(self)
- annot = self.this
- pdf = mupdf.pdf_get_bound_document(mupdf.pdf_annot_obj(annot))
- filter_ = _make_PdfFilterOptions(recurse=1, instance_forms=0, ascii=0, sanitize=sanitize)
- mupdf.pdf_filter_annot_contents(pdf, annot, filter_)
- @property
- def colors(self):
- """Color definitions."""
- try:
- CheckParent(self)
- annot = self.this
- assert isinstance(annot, mupdf.PdfAnnot)
- return JM_annot_colors(mupdf.pdf_annot_obj(annot))
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def delete_responses(self):
- """Delete 'Popup' and responding annotations."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- page = _pdf_annot_page(annot)
- while 1:
- irt_annot = JM_find_annot_irt(annot)
- if not irt_annot:
- break
- mupdf.pdf_delete_annot(page, irt_annot)
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('Popup'))
- annots = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Annots'))
- n = mupdf.pdf_array_len(annots)
- found = 0
- for i in range(n-1, -1, -1):
- o = mupdf.pdf_array_get(annots, i)
- p = mupdf.pdf_dict_get(o, PDF_NAME('Parent'))
- if not o.m_internal:
- continue
- if not mupdf.pdf_objcmp(p, annot_obj):
- mupdf.pdf_array_delete(annots, i)
- found = 1
- if found:
- mupdf.pdf_dict_put(page.obj(), PDF_NAME('Annots'), annots)
- @property
- def file_info(self):
- """Attached file information."""
- CheckParent(self)
- res = dict()
- length = -1
- size = -1
- desc = None
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- type_ = mupdf.pdf_annot_type(annot)
- if type_ != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
- raise TypeError( MSG_BAD_ANNOT_TYPE)
- stream = mupdf.pdf_dict_getl(
- annot_obj,
- PDF_NAME('FS'),
- PDF_NAME('EF'),
- PDF_NAME('F'),
- )
- if not stream.m_internal:
- RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
- fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
- o = mupdf.pdf_dict_get(fs, PDF_NAME('UF'))
- if o.m_internal:
- filename = mupdf.pdf_to_text_string(o)
- else:
- o = mupdf.pdf_dict_get(fs, PDF_NAME('F'))
- if o.m_internal:
- filename = mupdf.pdf_to_text_string(o)
- o = mupdf.pdf_dict_get(fs, PDF_NAME('Desc'))
- if o.m_internal:
- desc = mupdf.pdf_to_text_string(o)
- o = mupdf.pdf_dict_get(stream, PDF_NAME('Length'))
- if o.m_internal:
- length = mupdf.pdf_to_int(o)
- o = mupdf.pdf_dict_getl(stream, PDF_NAME('Params'), PDF_NAME('Size'))
- if o.m_internal:
- size = mupdf.pdf_to_int(o)
- res[ dictkey_filename] = JM_EscapeStrFromStr(filename)
- res[ dictkey_descr] = JM_UnicodeFromStr(desc)
- res[ dictkey_length] = length
- res[ dictkey_size] = size
- return res
- @property
- def flags(self):
- """Flags field."""
- CheckParent(self)
- annot = self.this
- return mupdf.pdf_annot_flags(annot)
- def get_file(self):
- """Retrieve attached file content."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- type = mupdf.pdf_annot_type(annot)
- if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
- raise TypeError( MSG_BAD_ANNOT_TYPE)
- stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
- if not stream.m_internal:
- RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
- buf = mupdf.pdf_load_stream(stream)
- res = JM_BinFromBuffer(buf)
- return res
- def get_oc(self):
- """Get annotation optional content reference."""
- CheckParent(self)
- oc = 0
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('OC'))
- if obj.m_internal:
- oc = mupdf.pdf_to_num(obj)
- return oc
- # PyMuPDF doesn't seem to have this .parent member, but removing it breaks
- # 11 tests...?
- #@property
- def get_parent(self):
- try:
- ret = getattr( self, 'parent')
- except AttributeError:
- page = _pdf_annot_page(self.this)
- assert isinstance( page, mupdf.PdfPage)
- document = Document( page.doc()) if page.m_internal else None
- ret = Page(page, document)
- #self.parent = weakref.proxy( ret)
- self.parent = ret
- #log(f'No attribute .parent: {type(self)=} {id(self)=}: have set {id(self.parent)=}.')
- #log( f'Have set self.parent')
- return ret
- def get_pixmap(self, matrix=None, dpi=None, colorspace=None, alpha=0):
- """annotation Pixmap"""
- CheckParent(self)
- cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK}
- if type(colorspace) is str:
- colorspace = cspaces.get(colorspace.lower(), None)
- if dpi:
- matrix = Matrix(dpi / 72, dpi / 72)
- ctm = JM_matrix_from_py(matrix)
- cs = colorspace
- if not cs:
- cs = mupdf.fz_device_rgb()
- pix = mupdf.pdf_new_pixmap_from_annot(self.this, ctm, cs, mupdf.FzSeparations(0), alpha)
- ret = Pixmap(pix)
- if dpi:
- ret.set_dpi(dpi, dpi)
- return ret
- def get_sound(self):
- """Retrieve sound stream."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- type = mupdf.pdf_annot_type(annot)
- sound = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Sound'))
- if type != mupdf.PDF_ANNOT_SOUND or not sound.m_internal:
- raise TypeError( MSG_BAD_ANNOT_TYPE)
- if mupdf.pdf_dict_get(sound, PDF_NAME('F')).m_internal:
- RAISEPY( "unsupported sound stream", JM_Exc_FileDataError)
- res = dict()
- obj = mupdf.pdf_dict_get(sound, PDF_NAME('R'))
- if obj.m_internal:
- res['rate'] = mupdf.pdf_to_real(obj)
- obj = mupdf.pdf_dict_get(sound, PDF_NAME('C'))
- if obj.m_internal:
- res['channels'] = mupdf.pdf_to_int(obj)
- obj = mupdf.pdf_dict_get(sound, PDF_NAME('B'))
- if obj.m_internal:
- res['bps'] = mupdf.pdf_to_int(obj)
- obj = mupdf.pdf_dict_get(sound, PDF_NAME('E'))
- if obj.m_internal:
- res['encoding'] = mupdf.pdf_to_name(obj)
- obj = mupdf.pdf_dict_gets(sound, "CO")
- if obj.m_internal:
- res['compression'] = mupdf.pdf_to_name(obj)
- buf = mupdf.pdf_load_stream(sound)
- stream = JM_BinFromBuffer(buf)
- res['stream'] = stream
- return res
- def get_textpage(self, clip=None, flags=0):
- """Make annotation TextPage."""
- CheckParent(self)
- options = mupdf.FzStextOptions(flags)
- if clip:
- assert hasattr(mupdf, 'FZ_STEXT_CLIP_RECT'), f'MuPDF-{mupdf_version} does not support FZ_STEXT_CLIP_RECT.'
- clip2 = JM_rect_from_py(clip)
- options.clip = clip2.internal()
- options.flags |= mupdf.FZ_STEXT_CLIP_RECT
- annot = self.this
- stextpage = mupdf.FzStextPage(annot, options)
- ret = TextPage(stextpage)
- p = self.get_parent()
- if isinstance(p, weakref.ProxyType):
- ret.parent = p
- else:
- ret.parent = weakref.proxy(p)
- return ret
- @property
- def has_popup(self):
- """Check if annotation has a Popup."""
- CheckParent(self)
- annot = self.this
- obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Popup'))
- return True if obj.m_internal else False
- @property
- def info(self):
- """Various information details."""
- CheckParent(self)
- annot = self.this
- res = dict()
- res[dictkey_content] = JM_UnicodeFromStr(mupdf.pdf_annot_contents(annot))
- o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Name'))
- res[dictkey_name] = JM_UnicodeFromStr(mupdf.pdf_to_name(o))
- # Title (= author)
- o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('T'))
- res[dictkey_title] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
- # CreationDate
- o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "CreationDate")
- res[dictkey_creationDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
- # ModDate
- o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('M'))
- res[dictkey_modDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
- # Subj
- o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "Subj")
- res[dictkey_subject] = mupdf.pdf_to_text_string(o)
- # Identification (PDF key /NM)
- o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM")
- res[dictkey_id] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
- return res
- @property
- def irt_xref(self):
- '''
- annotation IRT xref
- '''
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- irt = mupdf.pdf_dict_get( annot_obj, PDF_NAME('IRT'))
- if not irt.m_internal:
- return 0
- return mupdf.pdf_to_num( irt)
- @property
- def is_open(self):
- """Get 'open' status of annotation or its Popup."""
- CheckParent(self)
- return mupdf.pdf_annot_is_open(self.this)
- @property
- def language(self):
- """annotation language"""
- this_annot = self.this
- lang = mupdf.pdf_annot_language(this_annot)
- if lang == mupdf.FZ_LANG_UNSET:
- return
- assert hasattr(mupdf, 'fz_string_from_text_language2')
- return mupdf.fz_string_from_text_language2(lang)
- @property
- def line_ends(self):
- """Line end codes."""
- CheckParent(self)
- annot = self.this
- # return nothing for invalid annot types
- if not mupdf.pdf_annot_has_line_ending_styles(annot):
- return
- lstart = mupdf.pdf_annot_line_start_style(annot)
- lend = mupdf.pdf_annot_line_end_style(annot)
- return lstart, lend
- @property
- def next(self):
- """Next annotation."""
- CheckParent(self)
- this_annot = self.this
- assert isinstance(this_annot, mupdf.PdfAnnot)
- assert this_annot.m_internal
- type_ = mupdf.pdf_annot_type(this_annot)
- if type_ != mupdf.PDF_ANNOT_WIDGET:
- annot = mupdf.pdf_next_annot(this_annot)
- else:
- annot = mupdf.pdf_next_widget(this_annot)
- val = Annot(annot) if annot.m_internal else None
- if not val:
- return None
- val.thisown = True
- assert val.get_parent().this.m_internal_value() == self.get_parent().this.m_internal_value()
- val.parent._annot_refs[id(val)] = val
- if val.type[0] == mupdf.PDF_ANNOT_WIDGET:
- widget = Widget()
- TOOLS._fill_widget(val, widget)
- val = widget
- return val
- @property
- def opacity(self):
- """Opacity."""
- CheckParent(self)
- annot = self.this
- opy = -1
- ca = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_CA)
- if mupdf.pdf_is_number(ca):
- opy = mupdf.pdf_to_real(ca)
- return opy
- @property
- def popup_rect(self):
- """annotation 'Popup' rectangle"""
- CheckParent(self)
- rect = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Popup'))
- if obj.m_internal:
- rect = mupdf.pdf_dict_get_rect(obj, PDF_NAME('Rect'))
- #log( '{rect=}')
- val = JM_py_from_rect(rect)
- #log( '{val=}')
-
- val = Rect(val) * self.get_parent().transformation_matrix
- val *= self.get_parent().derotation_matrix
-
- return val
- @property
- def popup_xref(self):
- """annotation 'Popup' xref"""
- CheckParent(self)
- xref = 0
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Popup'))
- if obj.m_internal:
- xref = mupdf.pdf_to_num(obj)
- return xref
- @property
- def rect(self):
- """annotation rectangle"""
- if g_use_extra:
- val = extra.Annot_rect3( self.this)
- else:
- val = mupdf.pdf_bound_annot(self.this)
- val = Rect(val)
-
- # Caching self.parent_() reduces 1000x from 0.07 to 0.04.
- #
- p = self.get_parent()
- #p = getattr( self, 'parent', None)
- #if p is None:
- # p = self.parent
- # self.parent = p
- #p = self.parent_()
- val *= p.derotation_matrix
- return val
- @property
- def rect_delta(self):
- '''
- annotation delta values to rectangle
- '''
- annot_obj = mupdf.pdf_annot_obj(self.this)
- arr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('RD'))
- if mupdf.pdf_array_len( arr) == 4:
- return (
- mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 0)),
- mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 1)),
- -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 2)),
- -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 3)),
- )
- @property
- def rotation(self):
- """annotation rotation"""
- CheckParent(self)
- annot = self.this
- rotation = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_Rotate)
- if not rotation.m_internal:
- return -1
- return mupdf.pdf_to_int( rotation)
- def set_apn_bbox(self, bbox):
- """
- Set annotation appearance bbox.
- """
- CheckParent(self)
- page = self.get_parent()
- rot = page.rotation_matrix
- mat = page.transformation_matrix
- bbox *= rot * ~mat
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if not ap.m_internal:
- raise RuntimeError( MSG_BAD_APN)
- rect = JM_rect_from_py(bbox)
- mupdf.pdf_dict_put_rect(ap, PDF_NAME('BBox'), rect)
- def set_apn_matrix(self, matrix):
- """Set annotation appearance matrix."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
- if not ap.m_internal:
- raise RuntimeError( MSG_BAD_APN)
- mat = JM_matrix_from_py(matrix)
- mupdf.pdf_dict_put_matrix(ap, PDF_NAME('Matrix'), mat)
- def set_blendmode(self, blend_mode):
- """Set annotation BlendMode."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('BM'), blend_mode)
- def set_border(self, border=None, width=-1, style=None, dashes=None, clouds=-1):
- """Set border properties.
- Either a dict, or direct arguments width, style, dashes or clouds."""
- CheckParent(self)
- atype, atname = self.type[:2] # annotation type
- if atype not in (
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- ):
- message(f"Cannot set border for '{atname}'.")
- return None
- if atype not in (
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- ):
- if clouds > 0:
- message(f"Cannot set cloudy border for '{atname}'.")
- clouds = -1 # do not set border effect
- if type(border) is not dict:
- border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds}
- border.setdefault("width", -1)
- border.setdefault("style", None)
- border.setdefault("dashes", None)
- border.setdefault("clouds", -1)
- if border["width"] is None:
- border["width"] = -1
- if border["clouds"] is None:
- border["clouds"] = -1
- if hasattr(border["dashes"], "__getitem__"): # ensure sequence items are integers
- border["dashes"] = tuple(border["dashes"])
- for item in border["dashes"]:
- if not isinstance(item, int):
- border["dashes"] = None
- break
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- pdf = mupdf.pdf_get_bound_document( annot_obj)
- return JM_annot_set_border( border, pdf, annot_obj)
- def set_colors(self, colors=None, stroke=None, fill=None):
- """Set 'stroke' and 'fill' colors.
- Use either a dict or the direct arguments.
- """
- if self.type[0] == mupdf.PDF_ANNOT_FREE_TEXT:
- raise ValueError("cannot be used for FreeText annotations")
- CheckParent(self)
- doc = self.get_parent().parent
- if type(colors) is not dict:
- colors = {"fill": fill, "stroke": stroke}
- fill = colors.get("fill")
- stroke = colors.get("stroke")
- fill_annots = (mupdf.PDF_ANNOT_CIRCLE, mupdf.PDF_ANNOT_SQUARE, mupdf.PDF_ANNOT_LINE, mupdf.PDF_ANNOT_POLY_LINE, mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_REDACT,)
- if stroke in ([], ()):
- doc.xref_set_key(self.xref, "C", "[]")
- elif stroke is not None:
- if hasattr(stroke, "__float__"):
- stroke = [float(stroke)]
- CheckColor(stroke)
- assert len(stroke) in (1, 3, 4)
- s = f"[{_format_g(stroke)}]"
- doc.xref_set_key(self.xref, "C", s)
- if fill and self.type[0] not in fill_annots:
- message("Warning: fill color ignored for annot type '%s'." % self.type[1])
- return
- if fill in ([], ()):
- doc.xref_set_key(self.xref, "IC", "[]")
- elif fill is not None:
- if hasattr(fill, "__float__"):
- fill = [float(fill)]
- CheckColor(fill)
- assert len(fill) in (1, 3, 4)
- s = f"[{_format_g(fill)}]"
- doc.xref_set_key(self.xref, "IC", s)
- def set_flags(self, flags):
- """Set annotation flags."""
- CheckParent(self)
- annot = self.this
- mupdf.pdf_set_annot_flags(annot, flags)
- def set_info(self, info=None, content=None, title=None, creationDate=None, modDate=None, subject=None):
- """Set various properties."""
- CheckParent(self)
- if type(info) is dict: # build the args from the dictionary
- content = info.get("content", None)
- title = info.get("title", None)
- creationDate = info.get("creationDate", None)
- modDate = info.get("modDate", None)
- subject = info.get("subject", None)
- info = None
- annot = self.this
- # use this to indicate a 'markup' annot type
- is_markup = mupdf.pdf_annot_has_author(annot)
- # contents
- if content:
- mupdf.pdf_set_annot_contents(annot, content)
- if is_markup:
- # title (= author)
- if title:
- mupdf.pdf_set_annot_author(annot, title)
- # creation date
- if creationDate:
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('CreationDate'), creationDate)
- # mod date
- if modDate:
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('M'), modDate)
- # subject
- if subject:
- mupdf.pdf_dict_puts(mupdf.pdf_annot_obj(annot), "Subj", mupdf.pdf_new_text_string(subject))
- def set_irt_xref(self, xref):
- '''
- Set annotation IRT xref
- '''
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj( annot)
- page = _pdf_annot_page(annot)
- if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()):
- raise ValueError( MSG_BAD_XREF)
- irt = mupdf.pdf_new_indirect( page.doc(), xref, 0)
- subt = mupdf.pdf_dict_get( irt, PDF_NAME('Subtype'))
- irt_subt = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subt))
- if irt_subt < 0:
- raise ValueError( MSG_IS_NO_ANNOT)
- mupdf.pdf_dict_put( annot_obj, PDF_NAME('IRT'), irt)
- def set_language(self, language=None):
- """Set annotation language."""
- CheckParent(self)
- this_annot = self.this
- if not language:
- lang = mupdf.FZ_LANG_UNSET
- else:
- lang = mupdf.fz_text_language_from_string(language)
- mupdf.pdf_set_annot_language(this_annot, lang)
- def set_line_ends(self, start, end):
- """Set line end codes."""
- CheckParent(self)
- annot = self.this
- if mupdf.pdf_annot_has_line_ending_styles(annot):
- mupdf.pdf_set_annot_line_ending_styles(annot, start, end)
- else:
- message_warning("bad annot type for line ends")
- def set_name(self, name):
- """Set /Name (icon) of annotation."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('Name'), name)
- def set_oc(self, oc=0):
- """Set / remove annotation OC xref."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- if not oc:
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('OC'))
- else:
- JM_add_oc_object(mupdf.pdf_get_bound_document(annot_obj), annot_obj, oc)
- def set_opacity(self, opacity):
- """Set opacity."""
- CheckParent(self)
- annot = self.this
- if not _INRANGE(opacity, 0.0, 1.0):
- mupdf.pdf_set_annot_opacity(annot, 1)
- return
- mupdf.pdf_set_annot_opacity(annot, opacity)
- if opacity < 1.0:
- page = _pdf_annot_page(annot)
- page.transparency = 1
- def set_open(self, is_open):
- """Set 'open' status of annotation or its Popup."""
- CheckParent(self)
- annot = self.this
- mupdf.pdf_set_annot_is_open(annot, is_open)
- def set_popup(self, rect):
- '''
- Create annotation 'Popup' or update rectangle.
- '''
- CheckParent(self)
- annot = self.this
- pdfpage = _pdf_annot_page(annot)
- rot = JM_rotate_page_matrix(pdfpage)
- r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
- mupdf.pdf_set_annot_popup(annot, r)
- def set_rect(self, rect):
- """Set annotation rectangle."""
- CheckParent(self)
- annot = self.this
-
- pdfpage = _pdf_annot_page(annot)
- rot = JM_rotate_page_matrix(pdfpage)
- r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
- if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r):
- raise ValueError( MSG_BAD_RECT)
- try:
- mupdf.pdf_set_annot_rect(annot, r)
- except Exception as e:
- message(f'cannot set rect: {e}')
- return False
- def set_rotation(self, rotate=0):
- """Set annotation rotation."""
- CheckParent(self)
-
- annot = self.this
- type = mupdf.pdf_annot_type(annot)
- if type not in (
- mupdf.PDF_ANNOT_CARET,
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FREE_TEXT,
- mupdf.PDF_ANNOT_FILE_ATTACHMENT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- mupdf.PDF_ANNOT_STAMP,
- mupdf.PDF_ANNOT_TEXT,
- ):
- return
- rot = rotate
- while rot < 0:
- rot += 360
- while rot >= 360:
- rot -= 360
- if type == mupdf.PDF_ANNOT_FREE_TEXT and rot % 90 != 0:
- rot = 0
- annot_obj = mupdf.pdf_annot_obj(annot)
- mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rot)
- @property
- def type(self):
- """annotation type"""
- CheckParent(self)
- if not self.this.m_internal:
- return 'null'
- type_ = mupdf.pdf_annot_type(self.this)
- c = mupdf.pdf_string_from_annot_type(type_)
- o = mupdf.pdf_dict_gets( mupdf.pdf_annot_obj(self.this), 'IT')
- if not o.m_internal or mupdf.pdf_is_name(o):
- return (type_, c)
- it = mupdf.pdf_to_name(o)
- return (type_, c, it)
- def update(self,
- blend_mode: OptStr =None,
- opacity: OptFloat =None,
- fontsize: float =0,
- fontname: OptStr =None,
- text_color: OptSeq =None,
- border_color: OptSeq =None,
- fill_color: OptSeq =None,
- cross_out: bool =True,
- rotate: int =-1,
- ):
- """Update annot appearance.
- Notes:
- Depending on the annot type, some parameters make no sense,
- while others are only available in this method to achieve the
- desired result. This is especially true for 'FreeText' annots.
- Args:
- blend_mode: set the blend mode, all annotations.
- opacity: set the opacity, all annotations.
- fontsize: set fontsize, 'FreeText' only.
- fontname: set the font, 'FreeText' only.
- border_color: set border color, 'FreeText' only.
- text_color: set text color, 'FreeText' only.
- fill_color: set fill color, all annotations.
- cross_out: draw diagonal lines, 'Redact' only.
- rotate: set rotation, 'FreeText' and some others.
- """
- annot_obj = mupdf.pdf_annot_obj(self.this)
-
- if border_color:
- is_rich_text = mupdf.pdf_dict_get(annot_obj, PDF_NAME("RC"))
- if not is_rich_text:
- raise ValueError("cannot set border_color if rich_text is False")
- Annot.update_timing_test()
- CheckParent(self)
- def color_string(cs, code):
- """Return valid PDF color operator for a given color sequence.
- """
- cc = ColorCode(cs, code)
- if not cc:
- return b""
- return (cc + "\n").encode()
- annot_type = self.type[0] # get the annot type
- dt = self.border.get("dashes", None) # get the dashes spec
- bwidth = self.border.get("width", -1) # get border line width
- stroke = self.colors["stroke"] # get the stroke color
- if fill_color is not None:
- fill = fill_color
- else:
- fill = self.colors["fill"]
- rect = None # self.rect # prevent MuPDF fiddling with it
- apnmat = self.apn_matrix # prevent MuPDF fiddling with it
- if rotate != -1: # sanitize rotation value
- while rotate < 0:
- rotate += 360
- while rotate >= 360:
- rotate -= 360
- if annot_type == mupdf.PDF_ANNOT_FREE_TEXT and rotate % 90 != 0:
- rotate = 0
- #------------------------------------------------------------------
- # handle opacity and blend mode
- #------------------------------------------------------------------
- if blend_mode is None:
- blend_mode = self.blendmode
- if not hasattr(opacity, "__float__"):
- opacity = self.opacity
- if 0 <= opacity < 1 or blend_mode:
- opa_code = "/H gs\n" # then we must reference this 'gs'
- else:
- opa_code = ""
- if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
- CheckColor(text_color)
- CheckColor(fill_color)
- tcol, fname, fsize = TOOLS._parse_da(self)
- # read and update default appearance as necessary
- if fsize <= 0:
- fsize = 12
- if text_color:
- tcol = text_color
- if fontname:
- fname = fontname
- if fontsize > 0:
- fsize = fontsize
- JM_make_annot_DA(self, len(tcol), tcol, fname, fsize)
- blend_mode = None # not supported for free text annotations!
- #------------------------------------------------------------------
- # now invoke MuPDF to update the annot appearance
- #------------------------------------------------------------------
- val = self._update_appearance(
- opacity=opacity,
- blend_mode=blend_mode,
- fill_color=fill,
- rotate=rotate,
- )
- if val is False:
- raise RuntimeError("Error updating annotation.")
- if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
- # in absence of previous opacity, we may need to modify the AP
- ap = self._getAP()
- if 0 <= opacity < 1 and not ap.startswith(b"/H gs"):
- self._setAP(b"/H gs\n" + ap)
- return
- bfill = color_string(fill, "f")
- bstroke = color_string(stroke, "c")
- p_ctm = self.get_parent().transformation_matrix
- imat = ~p_ctm # inverse page transf. matrix
- if dt:
- dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n"
- dashes = dashes.encode("utf-8")
- else:
- dashes = None
- if self.line_ends:
- line_end_le, line_end_ri = self.line_ends
- else:
- line_end_le, line_end_ri = 0, 0 # init line end codes
- # read contents as created by MuPDF
- ap = self._getAP()
- ap_tab = ap.splitlines() # split in single lines
- ap_updated = False # assume we did nothing
- if annot_type == mupdf.PDF_ANNOT_REDACT:
- if cross_out: # create crossed-out rect
- ap_updated = True
- ap_tab = ap_tab[:-1]
- _, LL, LR, UR, UL = ap_tab
- ap_tab.append(LR)
- ap_tab.append(LL)
- ap_tab.append(UR)
- ap_tab.append(LL)
- ap_tab.append(UL)
- ap_tab.append(b"S")
- if bwidth > 0 or bstroke != b"":
- ap_updated = True
- ntab = [_format_g(bwidth).encode() + b" w"] if bwidth > 0 else []
- for line in ap_tab:
- if line.endswith(b"w"):
- continue
- if line.endswith(b"RG") and bstroke != b"":
- line = bstroke[:-1]
- ntab.append(line)
- ap_tab = ntab
- ap = b"\n".join(ap_tab)
- if annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
- ap = b"\n".join(ap_tab[:-1]) + b"\n"
- ap_updated = True
- if bfill != b"":
- if annot_type == mupdf.PDF_ANNOT_POLYGON:
- ap = ap + bfill + b"b" # close, fill, and stroke
- elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
- ap = ap + b"S" # stroke
- else:
- if annot_type == mupdf.PDF_ANNOT_POLYGON:
- ap = ap + b"s" # close and stroke
- elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
- ap = ap + b"S" # stroke
- if dashes is not None: # handle dashes
- ap = dashes + ap
- # reset dashing - only applies for LINE annots with line ends given
- ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1)
- ap_updated = True
- if opa_code:
- ap = opa_code.encode("utf-8") + ap
- ap_updated = True
- ap = b"q\n" + ap + b"\nQ\n"
- #----------------------------------------------------------------------
- # the following handles line end symbols for 'Polygon' and 'Polyline'
- #----------------------------------------------------------------------
- if line_end_le + line_end_ri > 0 and annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
- le_funcs = (None, TOOLS._le_square, TOOLS._le_circle,
- TOOLS._le_diamond, TOOLS._le_openarrow,
- TOOLS._le_closedarrow, TOOLS._le_butt,
- TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow,
- TOOLS._le_slash)
- le_funcs_range = range(1, len(le_funcs))
- d = 2 * max(1, self.border["width"])
- rect = self.rect + (-d, -d, d, d)
- ap_updated = True
- points = self.vertices
- if line_end_le in le_funcs_range:
- p1 = Point(points[0]) * imat
- p2 = Point(points[1]) * imat
- left = le_funcs[line_end_le](self, p1, p2, False, fill_color)
- ap += left.encode()
- if line_end_ri in le_funcs_range:
- p1 = Point(points[-2]) * imat
- p2 = Point(points[-1]) * imat
- left = le_funcs[line_end_ri](self, p1, p2, True, fill_color)
- ap += left.encode()
- if ap_updated:
- if rect: # rect modified here?
- self.set_rect(rect)
- self._setAP(ap, rect=1)
- else:
- self._setAP(ap, rect=0)
- #-------------------------------
- # handle annotation rotations
- #-------------------------------
- if annot_type not in ( # only these types are supported
- mupdf.PDF_ANNOT_CARET,
- mupdf.PDF_ANNOT_CIRCLE,
- mupdf.PDF_ANNOT_FILE_ATTACHMENT,
- mupdf.PDF_ANNOT_INK,
- mupdf.PDF_ANNOT_LINE,
- mupdf.PDF_ANNOT_POLY_LINE,
- mupdf.PDF_ANNOT_POLYGON,
- mupdf.PDF_ANNOT_SQUARE,
- mupdf.PDF_ANNOT_STAMP,
- mupdf.PDF_ANNOT_TEXT,
- ):
- return
- rot = self.rotation # get value from annot object
- if rot == -1: # nothing to change
- return
- M = (self.rect.tl + self.rect.br) / 2 # center of annot rect
- if rot == 0: # undo rotations
- if abs(apnmat - Matrix(1, 1)) < 1e-5:
- return # matrix already is a no-op
- quad = self.rect.morph(M, ~apnmat) # derotate rect
- self.setRect(quad.rect)
- self.set_apn_matrix(Matrix(1, 1)) # appearance matrix = no-op
- return
- mat = Matrix(rot)
- quad = self.rect.morph(M, mat)
- self.set_rect(quad.rect)
- self.set_apn_matrix(apnmat * mat)
- def update_file(self, buffer_=None, filename=None, ufilename=None, desc=None):
- """Update attached file."""
- CheckParent(self)
- annot = self.this
- annot_obj = mupdf.pdf_annot_obj(annot)
- pdf = mupdf.pdf_get_bound_document(annot_obj) # the owning PDF
- type = mupdf.pdf_annot_type(annot)
- if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
- raise TypeError( MSG_BAD_ANNOT_TYPE)
- stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
- # the object for file content
- if not stream.m_internal:
- RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
- fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
- # file content given
- res = JM_BufferFromBytes(buffer_)
- if buffer_ and not res.m_internal:
- raise ValueError( MSG_BAD_BUFFER)
- if res:
- JM_update_stream(pdf, stream, res, 1)
- # adjust /DL and /Size parameters
- len, _ = mupdf.fz_buffer_storage(res)
- l = mupdf.pdf_new_int(len)
- mupdf.pdf_dict_put(stream, PDF_NAME('DL'), l)
- mupdf.pdf_dict_putl(stream, l, PDF_NAME('Params'), PDF_NAME('Size'))
- if filename:
- mupdf.pdf_dict_put_text_string(stream, PDF_NAME('F'), filename)
- mupdf.pdf_dict_put_text_string(fs, PDF_NAME('F'), filename)
- mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), filename)
- mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), filename)
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('Contents'), filename)
- if ufilename:
- mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), ufilename)
- mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), ufilename)
- if desc:
- mupdf.pdf_dict_put_text_string(stream, PDF_NAME('Desc'), desc)
- mupdf.pdf_dict_put_text_string(fs, PDF_NAME('Desc'), desc)
- @staticmethod
- def update_timing_test():
- total = 0
- for i in range( 30*1000):
- total += i
- return total
-
- @property
- def vertices(self):
- """annotation vertex points"""
- CheckParent(self)
- annot = self.this
- assert isinstance(annot, mupdf.PdfAnnot)
- annot_obj = mupdf.pdf_annot_obj(annot)
- page = _pdf_annot_page(annot)
- page_ctm = mupdf.FzMatrix() # page transformation matrix
- dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform().
- mupdf.pdf_page_transform(page, dummy, page_ctm)
- derot = JM_derotate_page_matrix(page)
- page_ctm = mupdf.fz_concat(page_ctm, derot)
- #----------------------------------------------------------------
- # The following objects occur in different annotation types.
- # So we are sure that (!o) occurs at most once.
- # Every pair of floats is one point, that needs to be separately
- # transformed with the page transformation matrix.
- #----------------------------------------------------------------
- o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Vertices'))
- if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L'))
- if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints'))
- if not o.m_internal: o = mupdf.pdf_dict_gets(annot_obj, 'CL')
-
- if o.m_internal:
- # handle lists with 1-level depth
- # weiter
- res = []
- for i in range(0, mupdf.pdf_array_len(o), 2):
- x = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i))
- y = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i+1))
- point = mupdf.FzPoint(x, y)
- point = mupdf.fz_transform_point(point, page_ctm)
- res.append( (point.x, point.y))
- return res
-
- o = mupdf.pdf_dict_gets(annot_obj, 'InkList')
- if o.m_internal:
- # InkList has 2-level lists
- #inklist:
- res = []
- for i in range(mupdf.pdf_array_len(o)):
- res1 = []
- o1 = mupdf.pdf_array_get(o, i)
- for j in range(0, mupdf.pdf_array_len(o1), 2):
- x = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j))
- y = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j+1))
- point = mupdf.FzPoint(x, y)
- point = mupdf.fz_transform_point(point, page_ctm)
- res1.append( (point.x, point.y))
- res.append(res1)
- return res
- @property
- def xref(self):
- """annotation xref number"""
- CheckParent(self)
- annot = self.this
- return mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot))
- class Archive:
- def __init__( self, *args):
- '''
- Archive(dirname [, path]) - from folder
- Archive(file [, path]) - from file name or object
- Archive(data, name) - from memory item
- Archive() - empty archive
- Archive(archive [, path]) - from archive
- '''
- self._subarchives = list()
- self.this = mupdf.fz_new_multi_archive()
- if args:
- self.add( *args)
-
- def __repr__( self):
- return f'Archive, sub-archives: {len(self._subarchives)}'
- def _add_arch( self, subarch, path=None):
- mupdf.fz_mount_multi_archive( self.this, subarch, path)
-
- def _add_dir( self, folder, path=None):
- sub = mupdf.fz_open_directory( folder)
- mupdf.fz_mount_multi_archive( self.this, sub, path)
-
- def _add_treeitem( self, memory, name, path=None):
- buff = JM_BufferFromBytes( memory)
- sub = mupdf.fz_new_tree_archive( mupdf.FzTree())
- mupdf.fz_tree_archive_add_buffer( sub, name, buff)
- mupdf.fz_mount_multi_archive( self.this, sub, path)
-
- def _add_ziptarfile( self, filepath, type_, path=None):
- if type_ == 1:
- sub = mupdf.fz_open_zip_archive( filepath)
- else:
- sub = mupdf.fz_open_tar_archive( filepath)
- mupdf.fz_mount_multi_archive( self.this, sub, path)
-
- def _add_ziptarmemory( self, memory, type_, path=None):
- buff = JM_BufferFromBytes( memory)
- stream = mupdf.fz_open_buffer( buff)
- if type_==1:
- sub = mupdf.fz_open_zip_archive_with_stream( stream)
- else:
- sub = mupdf.fz_open_tar_archive_with_stream( stream)
- mupdf.fz_mount_multi_archive( self.this, sub, path)
-
- def add( self, content, path=None):
- '''
- Add a sub-archive.
- Args:
- content:
- The content to be added. May be one of:
- `str` - must be path of directory or file.
- `bytes`, `bytearray`, `io.BytesIO` - raw data.
- `zipfile.Zipfile`.
- `tarfile.TarFile`.
- `pymupdf.Archive`.
- A two-item tuple `(data, name)`.
- List or tuple (but not tuple with length 2) of the above.
- path: (str) a "virtual" path name, under which the elements
- of content can be retrieved. Use it to e.g. cope with
- duplicate element names.
- '''
- def is_binary_data(x):
- return isinstance(x, (bytes, bytearray, io.BytesIO))
- def make_subarch(entries, mount, fmt):
- subarch = dict(fmt=fmt, entries=entries, path=mount)
- if fmt != "tree" or self._subarchives == []:
- self._subarchives.append(subarch)
- else:
- ltree = self._subarchives[-1]
- if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]:
- self._subarchives.append(subarch)
- else:
- ltree["entries"].extend(subarch["entries"])
- self._subarchives[-1] = ltree
- if isinstance(content, pathlib.Path):
- content = str(content)
-
- if isinstance(content, str):
- if os.path.isdir(content):
- self._add_dir(content, path)
- return make_subarch(os.listdir(content), path, 'dir')
- elif os.path.isfile(content):
- assert isinstance(path, str) and path != '', \
- f'Need name for binary content, but {path=}.'
- with open(content) as f:
- ff = f.read()
- self._add_treeitem(ff, path)
- return make_subarch([path], None, 'tree')
- else:
- raise ValueError(f'Not a file or directory: {content!r}')
- elif is_binary_data(content):
- assert isinstance(path, str) and path != '' \
- f'Need name for binary content, but {path=}.'
- self._add_treeitem(content, path)
- return make_subarch([path], None, 'tree')
- elif isinstance(content, zipfile.ZipFile):
- filename = getattr(content, "filename", None)
- if filename is None:
- fp = content.fp.getvalue()
- self._add_ziptarmemory(fp, 1, path)
- else:
- self._add_ziptarfile(filename, 1, path)
- return make_subarch(content.namelist(), path, 'zip')
- elif isinstance(content, tarfile.TarFile):
- filename = getattr(content.fileobj, "name", None)
- if filename is None:
- fp = content.fileobj
- if not isinstance(fp, io.BytesIO):
- fp = fp.fileobj
- self._add_ziptarmemory(fp.getvalue(), 0, path)
- else:
- self._add_ziptarfile(filename, 0, path)
- return make_subarch(content.getnames(), path, 'tar')
- elif isinstance(content, Archive):
- self._add_arch(content, path)
- return make_subarch([], path, 'multi')
-
- if isinstance(content, tuple) and len(content) == 2:
- # covers the tree item plus path
- data, name = content
- assert isinstance(name, str), f'Unexpected {type(name)=}'
- if is_binary_data(data):
- self._add_treeitem(data, name, path=path)
- elif isinstance(data, str):
- if os.path.isfile(data):
- with open(data, 'rb') as f:
- ff = f.read()
- self._add_treeitem(ff, name, path=path)
- else:
- assert 0, f'Unexpected {type(data)=}.'
- return make_subarch([name], path, 'tree')
-
- elif hasattr(content, '__getitem__'):
- # Deal with sequence of disparate items.
- for item in content:
- self.add(item, path)
- return
-
- else:
- raise TypeError(f'Unrecognised type {type(content)}.')
- assert 0
- @property
- def entry_list( self):
- '''
- List of sub archives.
- '''
- return self._subarchives
-
- def has_entry( self, name):
- return mupdf.fz_has_archive_entry( self.this, name)
-
- def read_entry( self, name):
- buff = mupdf.fz_read_archive_entry( self.this, name)
- return JM_BinFromBuffer( buff)
- class Xml:
- def __enter__(self):
- return self
- def __exit__(self, *args):
- pass
- def __init__(self, rhs):
- if isinstance(rhs, mupdf.FzXml):
- self.this = rhs
- elif isinstance(rhs, str):
- buff = mupdf.fz_new_buffer_from_copied_data(rhs)
- self.this = mupdf.fz_parse_xml_from_html5(buff)
- else:
- assert 0, f'Unsupported type for rhs: {type(rhs)}'
-
- def _get_node_tree( self):
- def show_node(node, items, shift):
- while node is not None:
- if node.is_text:
- items.append((shift, f'"{node.text}"'))
- node = node.next
- continue
- items.append((shift, f"({node.tagname}"))
- for k, v in node.get_attributes().items():
- items.append((shift, f"={k} '{v}'"))
- child = node.first_child
- if child:
- items = show_node(child, items, shift + 1)
- items.append((shift, f"){node.tagname}"))
- node = node.next
- return items
- shift = 0
- items = []
- items = show_node(self, items, shift)
- return items
-
- def add_bullet_list(self):
- """Add bulleted list ("ul" tag)"""
- child = self.create_element("ul")
- self.append_child(child)
- return child
- def add_class(self, text):
- """Set some class via CSS. Replaces complete class spec."""
- cls = self.get_attribute_value("class")
- if cls is not None and text in cls:
- return self
- self.remove_attribute("class")
- if cls is None:
- cls = text
- else:
- cls += " " + text
- self.set_attribute("class", cls)
- return self
- def add_code(self, text=None):
- """Add a "code" tag"""
- child = self.create_element("code")
- if type(text) is str:
- child.append_child(self.create_text_node(text))
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(child)
- return self
- def add_codeblock(self):
- """Add monospaced lines ("pre" node)"""
- child = self.create_element("pre")
- self.append_child(child)
- return child
- def add_description_list(self):
- """Add description list ("dl" tag)"""
- child = self.create_element("dl")
- self.append_child(child)
- return child
- def add_division(self):
- """Add "div" tag"""
- child = self.create_element("div")
- self.append_child(child)
- return child
- def add_header(self, level=1):
- """Add header tag"""
- if level not in range(1, 7):
- raise ValueError("Header level must be in [1, 6]")
- this_tag = self.tagname
- new_tag = f"h{level}"
- child = self.create_element(new_tag)
- if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"):
- self.append_child(child)
- return child
- self.parent.append_child(child)
- return child
- def add_horizontal_line(self):
- """Add horizontal line ("hr" tag)"""
- child = self.create_element("hr")
- self.append_child(child)
- return child
- def add_image(self, name, width=None, height=None, imgfloat=None, align=None):
- """Add image node (tag "img")."""
- child = self.create_element("img")
- if width is not None:
- child.set_attribute("width", f"{width}")
- if height is not None:
- child.set_attribute("height", f"{height}")
- if imgfloat is not None:
- child.set_attribute("style", f"float: {imgfloat}")
- if align is not None:
- child.set_attribute("align", f"{align}")
- child.set_attribute("src", f"{name}")
- self.append_child(child)
- return child
- def add_link(self, href, text=None):
- """Add a hyperlink ("a" tag)"""
- child = self.create_element("a")
- if not isinstance(text, str):
- text = href
- child.set_attribute("href", href)
- child.append_child(self.create_text_node(text))
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(child)
- return self
- def add_list_item(self):
- """Add item ("li" tag) under a (numbered or bulleted) list."""
- if self.tagname not in ("ol", "ul"):
- raise ValueError("cannot add list item to", self.tagname)
- child = self.create_element("li")
- self.append_child(child)
- return child
- def add_number_list(self, start=1, numtype=None):
- """Add numbered list ("ol" tag)"""
- child = self.create_element("ol")
- if start > 1:
- child.set_attribute("start", str(start))
- if numtype is not None:
- child.set_attribute("type", numtype)
- self.append_child(child)
- return child
- def add_paragraph(self):
- """Add "p" tag"""
- child = self.create_element("p")
- if self.tagname != "p":
- self.append_child(child)
- else:
- self.parent.append_child(child)
- return child
- def add_span(self):
- child = self.create_element("span")
- self.append_child(child)
- return child
- def add_style(self, text):
- """Set some style via CSS style. Replaces complete style spec."""
- style = self.get_attribute_value("style")
- if style is not None and text in style:
- return self
- self.remove_attribute("style")
- if style is None:
- style = text
- else:
- style += ";" + text
- self.set_attribute("style", style)
- return self
- def add_subscript(self, text=None):
- """Add a subscript ("sub" tag)"""
- child = self.create_element("sub")
- if type(text) is str:
- child.append_child(self.create_text_node(text))
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(child)
- return self
- def add_superscript(self, text=None):
- """Add a superscript ("sup" tag)"""
- child = self.create_element("sup")
- if type(text) is str:
- child.append_child(self.create_text_node(text))
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(child)
- return self
- def add_text(self, text):
- """Add text. Line breaks are honored."""
- lines = text.splitlines()
- line_count = len(lines)
- prev = self.span_bottom()
- if prev is None:
- prev = self
- for i, line in enumerate(lines):
- prev.append_child(self.create_text_node(line))
- if i < line_count - 1:
- prev.append_child(self.create_element("br"))
- return self
- def append_child( self, child):
- mupdf.fz_dom_append_child( self.this, child.this)
-
- def append_styled_span(self, style):
- span = self.create_element("span")
- span.add_style(style)
- prev = self.span_bottom()
- if prev is None:
- prev = self
- prev.append_child(span)
- return prev
- def bodytag( self):
- return Xml( mupdf.fz_dom_body( self.this))
-
- def clone( self):
- ret = mupdf.fz_dom_clone( self.this)
- return Xml( ret)
-
- @staticmethod
- def color_text(color):
- if type(color) is str:
- return color
- if type(color) is int:
- return f"rgb({sRGB_to_rgb(color)})"
- if type(color) in (tuple, list):
- return f"rgb{tuple(color)}"
- return color
- def create_element( self, tag):
- return Xml( mupdf.fz_dom_create_element( self.this, tag))
-
- def create_text_node( self, text):
- return Xml( mupdf.fz_dom_create_text_node( self.this, text))
-
- def debug(self):
- """Print a list of the node tree below self."""
- items = self._get_node_tree()
- for item in items:
- message(" " * item[0] + item[1].replace("\n", "\\n"))
- def find( self, tag, att, match):
- ret = mupdf.fz_dom_find( self.this, tag, att, match)
- if ret.m_internal:
- return Xml( ret)
-
- def find_next( self, tag, att, match):
- ret = mupdf.fz_dom_find_next( self.this, tag, att, match)
- if ret.m_internal:
- return Xml( ret)
-
- @property
- def first_child( self):
- if mupdf.fz_xml_text( self.this):
- # text node, has no child.
- return
- ret = mupdf.fz_dom_first_child( self)
- if ret.m_internal:
- return Xml( ret)
-
- def get_attribute_value( self, key):
- assert key
- return mupdf.fz_dom_attribute( self.this, key)
-
- def get_attributes( self):
- if mupdf.fz_xml_text( self.this):
- # text node, has no attributes.
- return
- result = dict()
- i = 0
- while 1:
- val, key = mupdf.fz_dom_get_attribute( self.this, i)
- if not val or not key:
- break
- result[ key] = val
- i += 1
- return result
-
- def insert_after( self, node):
- mupdf.fz_dom_insert_after( self.this, node.this)
-
- def insert_before( self, node):
- mupdf.fz_dom_insert_before( self.this, node.this)
-
- def insert_text(self, text):
- lines = text.splitlines()
- line_count = len(lines)
- for i, line in enumerate(lines):
- self.append_child(self.create_text_node(line))
- if i < line_count - 1:
- self.append_child(self.create_element("br"))
- return self
- @property
- def is_text(self):
- """Check if this is a text node."""
- return self.text is not None
- @property
- def last_child(self):
- """Return last child node."""
- child = self.first_child
- if child is None:
- return None
- while True:
- next = child.next
- if not next:
- return child
- child = next
- @property
- def next( self):
- ret = mupdf.fz_dom_next( self.this)
- if ret.m_internal:
- return Xml( ret)
-
- @property
- def parent( self):
- ret = mupdf.fz_dom_parent( self.this)
- if ret.m_internal:
- return Xml( ret)
-
- @property
- def previous( self):
- ret = mupdf.fz_dom_previous( self.this)
- if ret.m_internal:
- return Xml( ret)
-
- def remove( self):
- mupdf.fz_dom_remove( self.this)
-
- def remove_attribute( self, key):
- assert key
- mupdf.fz_dom_remove_attribute( self.this, key)
-
- @property
- def root( self):
- return Xml( mupdf.fz_xml_root( self.this))
-
- def set_align(self, align):
- """Set text alignment via CSS style"""
- text = "text-align: %s"
- if isinstance( align, str):
- t = align
- elif align == TEXT_ALIGN_LEFT:
- t = "left"
- elif align == TEXT_ALIGN_CENTER:
- t = "center"
- elif align == TEXT_ALIGN_RIGHT:
- t = "right"
- elif align == TEXT_ALIGN_JUSTIFY:
- t = "justify"
- else:
- raise ValueError(f"Unrecognised {align=}")
- text = text % t
- self.add_style(text)
- return self
- def set_attribute( self, key, value):
- assert key
- mupdf.fz_dom_add_attribute( self.this, key, value)
-
- def set_bgcolor(self, color):
- """Set background color via CSS style"""
- text = f"background-color: %s" % self.color_text(color)
- self.add_style(text) # does not work on span level
- return self
- def set_bold(self, val=True):
- """Set bold on / off via CSS style"""
- if val:
- val="bold"
- else:
- val="normal"
- text = "font-weight: %s" % val
- self.append_styled_span(text)
- return self
- def set_color(self, color):
- """Set text color via CSS style"""
- text = f"color: %s" % self.color_text(color)
- self.append_styled_span(text)
- return self
- def set_columns(self, cols):
- """Set number of text columns via CSS style"""
- text = f"columns: {cols}"
- self.append_styled_span(text)
- return self
- def set_font(self, font):
- """Set font-family name via CSS style"""
- text = "font-family: %s" % font
- self.append_styled_span(text)
- return self
- def set_fontsize(self, fontsize):
- """Set font size name via CSS style"""
- if type(fontsize) is str:
- px=""
- else:
- px="px"
- text = f"font-size: {fontsize}{px}"
- self.append_styled_span(text)
- return self
- def set_id(self, unique):
- """Set a unique id."""
- # check uniqueness
- root = self.root
- if root.find(None, "id", unique):
- raise ValueError(f"id '{unique}' already exists")
- self.set_attribute("id", unique)
- return self
- def set_italic(self, val=True):
- """Set italic on / off via CSS style"""
- if val:
- val="italic"
- else:
- val="normal"
- text = "font-style: %s" % val
- self.append_styled_span(text)
- return self
- def set_leading(self, leading):
- """Set inter-line spacing value via CSS style - block-level only."""
- text = f"-mupdf-leading: {leading}"
- self.add_style(text)
- return self
- def set_letter_spacing(self, spacing):
- """Set inter-letter spacing value via CSS style"""
- text = f"letter-spacing: {spacing}"
- self.append_styled_span(text)
- return self
- def set_lineheight(self, lineheight):
- """Set line height name via CSS style - block-level only."""
- text = f"line-height: {lineheight}"
- self.add_style(text)
- return self
- def set_margins(self, val):
- """Set margin values via CSS style"""
- text = "margins: %s" % val
- self.append_styled_span(text)
- return self
- def set_opacity(self, opacity):
- """Set opacity via CSS style"""
- text = f"opacity: {opacity}"
- self.append_styled_span(text)
- return self
- def set_pagebreak_after(self):
- """Insert a page break after this node."""
- text = "page-break-after: always"
- self.add_style(text)
- return self
- def set_pagebreak_before(self):
- """Insert a page break before this node."""
- text = "page-break-before: always"
- self.add_style(text)
- return self
- def set_properties(
- self,
- align=None,
- bgcolor=None,
- bold=None,
- color=None,
- columns=None,
- font=None,
- fontsize=None,
- indent=None,
- italic=None,
- leading=None,
- letter_spacing=None,
- lineheight=None,
- margins=None,
- pagebreak_after=None,
- pagebreak_before=None,
- word_spacing=None,
- unqid=None,
- cls=None,
- ):
- """Set any or all properties of a node.
- To be used for existing nodes preferably.
- """
- root = self.root
- temp = root.add_division()
- if align is not None:
- temp.set_align(align)
- if bgcolor is not None:
- temp.set_bgcolor(bgcolor)
- if bold is not None:
- temp.set_bold(bold)
- if color is not None:
- temp.set_color(color)
- if columns is not None:
- temp.set_columns(columns)
- if font is not None:
- temp.set_font(font)
- if fontsize is not None:
- temp.set_fontsize(fontsize)
- if indent is not None:
- temp.set_text_indent(indent)
- if italic is not None:
- temp.set_italic(italic)
- if leading is not None:
- temp.set_leading(leading)
- if letter_spacing is not None:
- temp.set_letter_spacing(letter_spacing)
- if lineheight is not None:
- temp.set_lineheight(lineheight)
- if margins is not None:
- temp.set_margins(margins)
- if pagebreak_after is not None:
- temp.set_pagebreak_after()
- if pagebreak_before is not None:
- temp.set_pagebreak_before()
- if word_spacing is not None:
- temp.set_word_spacing(word_spacing)
- if unqid is not None:
- self.set_id(unqid)
- if cls is not None:
- self.add_class(cls)
- styles = []
- top_style = temp.get_attribute_value("style")
- if top_style is not None:
- styles.append(top_style)
- child = temp.first_child
- while child:
- styles.append(child.get_attribute_value("style"))
- child = child.first_child
- self.set_attribute("style", ";".join(styles))
- temp.remove()
- return self
- def set_text_indent(self, indent):
- """Set text indentation name via CSS style - block-level only."""
- text = f"text-indent: {indent}"
- self.add_style(text)
- return self
- def set_underline(self, val="underline"):
- text = "text-decoration: %s" % val
- self.append_styled_span(text)
- return self
- def set_word_spacing(self, spacing):
- """Set inter-word spacing value via CSS style"""
- text = f"word-spacing: {spacing}"
- self.append_styled_span(text)
- return self
- def span_bottom(self):
- """Find deepest level in stacked spans."""
- parent = self
- child = self.last_child
- if child is None:
- return None
- while child.is_text:
- child = child.previous
- if child is None:
- break
- if child is None or child.tagname != "span":
- return None
- while True:
- if child is None:
- return parent
- if child.tagname in ("a", "sub","sup","body") or child.is_text:
- child = child.next
- continue
- if child.tagname == "span":
- parent = child
- child = child.first_child
- else:
- return parent
- @property
- def tagname( self):
- return mupdf.fz_xml_tag( self.this)
-
- @property
- def text( self):
- return mupdf.fz_xml_text( self.this)
-
- add_var = add_code
- add_samp = add_code
- add_kbd = add_code
- class Colorspace:
- def __init__(self, type_):
- """Supported are GRAY, RGB and CMYK."""
- if isinstance( type_, mupdf.FzColorspace):
- self.this = type_
- elif type_ == CS_GRAY:
- self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_GRAY)
- elif type_ == CS_CMYK:
- self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_CMYK)
- elif type_ == CS_RGB:
- self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
- else:
- self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
- def __repr__(self):
- x = ("", "GRAY", "", "RGB", "CMYK")[self.n]
- return "Colorspace(CS_%s) - %s" % (x, self.name)
- def _name(self):
- return mupdf.fz_colorspace_name(self.this)
- @property
- def n(self):
- """Size of one pixel."""
- return mupdf.fz_colorspace_n(self.this)
- @property
- def name(self):
- """Name of the Colorspace."""
- return self._name()
- class DeviceWrapper:
- def __init__(self, *args):
- if args_match( args, mupdf.FzDevice):
- device, = args
- self.this = device
- elif args_match( args, Pixmap, None):
- pm, clip = args
- bbox = JM_irect_from_py( clip)
- if mupdf.fz_is_infinite_irect( bbox):
- self.this = mupdf.fz_new_draw_device( mupdf.FzMatrix(), pm)
- else:
- self.this = mupdf.fz_new_draw_device_with_bbox( mupdf.FzMatrix(), pm, bbox)
- elif args_match( args, mupdf.FzDisplayList):
- dl, = args
- self.this = mupdf.fz_new_list_device( dl)
- elif args_match( args, mupdf.FzStextPage, None):
- tp, flags = args
- opts = mupdf.FzStextOptions( flags)
- self.this = mupdf.fz_new_stext_device( tp, opts)
- else:
- raise Exception( f'Unrecognised args for DeviceWrapper: {args!r}')
- class DisplayList:
- def __del__(self):
- if not type(self) is DisplayList: return
- self.thisown = False
- def __init__(self, *args):
- if len(args) == 1 and isinstance(args[0], mupdf.FzRect):
- self.this = mupdf.FzDisplayList(args[0])
- elif len(args) == 1 and isinstance(args[0], mupdf.FzDisplayList):
- self.this = args[0]
- else:
- assert 0, f'Unrecognised {args=}'
- def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None):
- if isinstance(colorspace, Colorspace):
- colorspace = colorspace.this
- else:
- colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
- val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None)
- val.thisown = True
- return val
- def get_textpage(self, flags=3):
- """Make a TextPage from a DisplayList."""
- stext_options = mupdf.FzStextOptions()
- stext_options.flags = flags
- val = mupdf.FzStextPage(self.this, stext_options)
- val.thisown = True
- return val
- @property
- def rect(self):
- val = JM_py_from_rect(mupdf.fz_bound_display_list(self.this))
- val = Rect(val)
- return val
- def run(self, dw, m, area):
- mupdf.fz_run_display_list(
- self.this,
- dw.device,
- JM_matrix_from_py(m),
- JM_rect_from_py(area),
- mupdf.FzCookie(),
- )
- if g_use_extra:
- extra_FzDocument_insert_pdf = extra.FzDocument_insert_pdf
- class Document:
- def __contains__(self, loc) -> bool:
- if type(loc) is int:
- if loc < self.page_count:
- return True
- return False
- if type(loc) not in (tuple, list) or len(loc) != 2:
- return False
- chapter, pno = loc
- if (0
- or not isinstance(chapter, int)
- or chapter < 0
- or chapter >= self.chapter_count
- ):
- return False
- if (0
- or not isinstance(pno, int)
- or pno < 0
- or pno >= self.chapter_page_count(chapter)
- ):
- return False
- return True
- def __delitem__(self, i)->None:
- if not self.is_pdf:
- raise ValueError("is no PDF")
- if type(i) is int:
- return self.delete_page(i)
- if type(i) in (list, tuple, range):
- return self.delete_pages(i)
- if type(i) is not slice:
- raise ValueError("bad argument type")
- pc = self.page_count
- start = i.start if i.start else 0
- stop = i.stop if i.stop else pc
- step = i.step if i.step else 1
- while start < 0:
- start += pc
- if start >= pc:
- raise ValueError("bad page number(s)")
- while stop < 0:
- stop += pc
- if stop > pc:
- raise ValueError("bad page number(s)")
- return self.delete_pages(range(start, stop, step))
- def __enter__(self):
- return self
- def __exit__(self, *args):
- self.close()
- @typing.overload
- def __getitem__(self, i: int = 0) -> Page:
- ...
-
- if sys.version_info >= (3, 9):
- @typing.overload
- def __getitem__(self, i: slice) -> list[Page]:
- ...
-
- @typing.overload
- def __getitem__(self, i: tuple[int, int]) -> Page:
- ...
-
- def __getitem__(self, i=0):
- if isinstance(i, slice):
- return [self[j] for j in range(*i.indices(len(self)))]
- assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)), \
- f'Invalid item number: {i=}.'
- if i not in self:
- raise IndexError(f"page {i} not in document")
- return self.load_page(i)
- def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0, height=0, fontsize=11):
- """Creates a document. Use 'open' as a synonym.
- Notes:
- Basic usages:
- open() - new PDF document
- open(filename) - string or pathlib.Path, must have supported
- file extension.
- open(type, buffer) - type: valid extension, buffer: bytes object.
- open(stream=buffer, filetype=type) - keyword version of previous.
- open(filename, fileype=type) - filename with unrecognized extension.
- rect, width, height, fontsize: layout reflowable document
- on open (e.g. EPUB). Ignored if n/a.
- """
- # We temporarily set JM_mupdf_show_errors=0 while we are constructing,
- # then restore its original value in a `finally:` block.
- #
- global JM_mupdf_show_errors
- JM_mupdf_show_errors_old = JM_mupdf_show_errors
- JM_mupdf_show_errors = 0
-
- try:
- self.is_closed = False
- self.is_encrypted = False
- self.is_encrypted = False
- self.metadata = None
- self.FontInfos = []
- self.Graftmaps = {}
- self.ShownPages = {}
- self.InsertedImages = {}
- self._page_refs = weakref.WeakValueDictionary()
- if isinstance(filename, mupdf.PdfDocument):
- pdf_document = filename
- self.this = pdf_document
- self.this_is_pdf = True
- return
-
- w = width
- h = height
- r = JM_rect_from_py(rect)
- if not mupdf.fz_is_infinite_rect(r):
- w = r.x1 - r.x0
- h = r.y1 - r.y0
- self._name = filename
- self.stream = stream
-
- if stream is not None:
- if filename is not None and filetype is None:
- # 2025-05-06: Use <filename> as the filetype. This is
- # reversing precedence - we used to use <filename> if both
- # were set.
- filetype = filename
- if isinstance(stream, (bytes, memoryview)):
- pass
- elif isinstance(stream, bytearray):
- stream = bytes(stream)
- elif isinstance(stream, io.BytesIO):
- stream = stream.getvalue()
- else:
- raise TypeError(f"bad stream: {type(stream)=}.")
- self.stream = stream
-
- assert isinstance(stream, (bytes, memoryview))
- if len(stream) == 0:
- # MuPDF raise an exception for this but also generates
- # warnings, which is not very helpful for us. So instead we
- # raise a specific exception.
- raise EmptyFileError('Cannot open empty stream.')
-
- stream2 = mupdf.fz_open_memory(mupdf.python_buffer_data(stream), len(stream))
- try:
- doc = mupdf.fz_open_document_with_stream(filetype if filetype else '', stream2)
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise FileDataError('Failed to open stream') from e
-
- elif filename:
- assert not stream
- if isinstance(filename, str):
- pass
- elif hasattr(filename, "absolute"):
- filename = str(filename)
- elif hasattr(filename, "name"):
- filename = filename.name
- else:
- raise TypeError(f"bad filename: {type(filename)=} {filename=}.")
- self._name = filename
-
- # Generate our own specific exceptions. This avoids MuPDF
- # generating warnings etc.
- if not os.path.exists(filename):
- raise FileNotFoundError(f"no such file: '{filename}'")
- elif not os.path.isfile(filename):
- raise FileDataError(f"'{filename}' is no file")
- elif os.path.getsize(filename) == 0:
- raise EmptyFileError(f'Cannot open empty file: {filename=}.')
-
- if filetype:
- # Override the type implied by <filename>. MuPDF does not
- # have a way to do this directly so we open via a stream.
- try:
- fz_stream = mupdf.fz_open_file(filename)
- doc = mupdf.fz_open_document_with_stream(filetype, fz_stream)
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e
- else:
- try:
- doc = mupdf.fz_open_document(filename)
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise FileDataError(f'Failed to open file {filename!r}.') from e
- else:
- pdf = mupdf.PdfDocument()
- doc = mupdf.FzDocument(pdf)
-
- if w > 0 and h > 0:
- mupdf.fz_layout_document(doc, w, h, fontsize)
- elif mupdf.fz_is_document_reflowable(doc):
- mupdf.fz_layout_document(doc, 400, 600, 11)
- self.this = doc
- # fixme: not sure where self.thisown gets initialised in PyMuPDF.
- #
- self.thisown = True
- if self.thisown:
- self._graft_id = TOOLS.gen_id()
- if self.needs_pass:
- self.is_encrypted = True
- else: # we won't init until doc is decrypted
- self.init_doc()
- # the following hack detects invalid/empty SVG files, which else may lead
- # to interpreter crashes
- if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower():
- try:
- _ = self.convert_to_pdf() # this seems to always work
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise FileDataError("cannot open broken document") from e
- if g_use_extra:
- self.this_is_pdf = isinstance( self.this, mupdf.PdfDocument)
- if self.this_is_pdf:
- self.page_count2 = extra.page_count_pdf
- else:
- self.page_count2 = extra.page_count_fz
- finally:
- JM_mupdf_show_errors = JM_mupdf_show_errors_old
-
- def __len__(self) -> int:
- return self.page_count
- def __repr__(self) -> str:
- m = "closed " if self.is_closed else ""
- if self.stream is None:
- if self.name == "":
- return m + "Document(<new PDF, doc# %i>)" % self._graft_id
- return m + "Document('%s')" % (self.name,)
- return m + "Document('%s', <memory, doc# %i>)" % (self.name, self._graft_id)
- def _addFormFont(self, name, font):
- """Add new form font."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return
- fonts = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('DR'),
- PDF_NAME('Font'),
- )
- if not fonts.m_internal or not mupdf.pdf_is_dict( fonts):
- raise RuntimeError( "PDF has no form fonts yet")
- k = mupdf.pdf_new_name( name)
- v = JM_pdf_obj_from_str( pdf, font)
- mupdf.pdf_dict_put( fonts, k, v)
- def _delToC(self):
- """Delete the TOC."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- xrefs = [] # create Python list
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return xrefs # not a pdf
- # get the main root
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- # get the outline root
- olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
- if not olroot.m_internal:
- return xrefs # no outlines or some problem
- first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) # first outline
- xrefs = JM_outline_xrefs(first, xrefs)
- xref_count = len(xrefs)
- olroot_xref = mupdf.pdf_to_num(olroot) # delete OL root
- mupdf.pdf_delete_object(pdf, olroot_xref) # delete OL root
- mupdf.pdf_dict_del(root, PDF_NAME('Outlines')) # delete OL root
- for i in range(xref_count):
- _, xref = JM_INT_ITEM(xrefs, i)
- mupdf.pdf_delete_object(pdf, xref) # delete outline item
- xrefs.append(olroot_xref)
- val = xrefs
- self.init_doc()
- return val
- def _delete_page(self, pno):
- pdf = _as_pdf_document(self)
- mupdf.pdf_delete_page( pdf, pno)
- if pdf.m_internal.rev_page_map:
- mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
- def _deleteObject(self, xref):
- """Delete object."""
- pdf = _as_pdf_document(self)
- if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
- raise ValueError( MSG_BAD_XREF)
- mupdf.pdf_delete_object(pdf, xref)
- def _embeddedFileGet(self, idx):
- pdf = _as_pdf_document(self)
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- entry = mupdf.pdf_array_get(names, 2*idx+1)
- filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
- buf = mupdf.pdf_load_stream(filespec)
- cont = JM_BinFromBuffer(buf)
- return cont
- def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int:
- filenames = self.embfile_names()
- msg = "'%s' not in EmbeddedFiles array." % str(item)
- if item in filenames:
- idx = filenames.index(item)
- elif item in range(len(filenames)):
- idx = item
- else:
- raise ValueError(msg)
- return idx
- def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None):
- pdf = _as_pdf_document(self)
- data = JM_BufferFromBytes(buffer_)
- if not data.m_internal:
- raise TypeError( MSG_BAD_BUFFER)
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- if not mupdf.pdf_is_array(names):
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- names = mupdf.pdf_new_array(pdf, 6) # an even number!
- mupdf.pdf_dict_putl(
- root,
- names,
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- fileentry = JM_embed_file(pdf, data, filename, ufilename, desc, 1)
- xref = mupdf.pdf_to_num(
- mupdf.pdf_dict_getl(fileentry, PDF_NAME('EF'), PDF_NAME('F'))
- )
- mupdf.pdf_array_push(names, mupdf.pdf_new_text_string(name))
- mupdf.pdf_array_push(names, fileentry)
- return xref
- def _embfile_del(self, idx):
- pdf = _as_pdf_document(self)
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- mupdf.pdf_array_delete(names, idx + 1)
- mupdf.pdf_array_delete(names, idx)
- def _embfile_info(self, idx, infodict):
- pdf = _as_pdf_document(self)
- xref = 0
- ci_xref=0
- trailer = mupdf.pdf_trailer(pdf)
- names = mupdf.pdf_dict_getl(
- trailer,
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- o = mupdf.pdf_array_get(names, 2*idx+1)
- ci = mupdf.pdf_dict_get(o, PDF_NAME('CI'))
- if ci.m_internal:
- ci_xref = mupdf.pdf_to_num(ci)
- infodict["collection"] = ci_xref
- name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('F')))
- infodict[dictkey_filename] = JM_EscapeStrFromStr(name)
- name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('UF')))
- infodict[dictkey_ufilename] = JM_EscapeStrFromStr(name)
- name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('Desc')))
- infodict[dictkey_descr] = JM_UnicodeFromStr(name)
- len_ = -1
- DL = -1
- fileentry = mupdf.pdf_dict_getl(o, PDF_NAME('EF'), PDF_NAME('F'))
- xref = mupdf.pdf_to_num(fileentry)
- o = mupdf.pdf_dict_get(fileentry, PDF_NAME('Length'))
- if o.m_internal:
- len_ = mupdf.pdf_to_int(o)
- o = mupdf.pdf_dict_get(fileentry, PDF_NAME('DL'))
- if o.m_internal:
- DL = mupdf.pdf_to_int(o)
- else:
- o = mupdf.pdf_dict_getl(fileentry, PDF_NAME('Params'), PDF_NAME('Size'))
- if o.m_internal:
- DL = mupdf.pdf_to_int(o)
- infodict[dictkey_size] = DL
- infodict[dictkey_length] = len_
- return xref
- def _embfile_names(self, namelist):
- """Get list of embedded file names."""
- pdf = _as_pdf_document(self)
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- if mupdf.pdf_is_array(names):
- n = mupdf.pdf_array_len(names)
- for i in range(0, n, 2):
- val = JM_EscapeStrFromStr(
- mupdf.pdf_to_text_string(
- mupdf.pdf_array_get(names, i)
- )
- )
- namelist.append(val)
- def _embfile_upd(self, idx, buffer_=None, filename=None, ufilename=None, desc=None):
- pdf = _as_pdf_document(self)
- xref = 0
- names = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- entry = mupdf.pdf_array_get(names, 2*idx+1)
- filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
- if not filespec.m_internal:
- RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
- res = JM_BufferFromBytes(buffer_)
- if buffer_ and buffer_.m_internal and not res.m_internal:
- raise TypeError( MSG_BAD_BUFFER)
- if res.m_internal and buffer_ and buffer_.m_internal:
- JM_update_stream(pdf, filespec, res, 1)
- # adjust /DL and /Size parameters
- len, _ = mupdf.fz_buffer_storage(res)
- l = mupdf.pdf_new_int(len)
- mupdf.pdf_dict_put(filespec, PDF_NAME('DL'), l)
- mupdf.pdf_dict_putl(filespec, l, PDF_NAME('Params'), PDF_NAME('Size'))
- xref = mupdf.pdf_to_num(filespec)
- if filename:
- mupdf.pdf_dict_put_text_string(entry, PDF_NAME('F'), filename)
- if ufilename:
- mupdf.pdf_dict_put_text_string(entry, PDF_NAME('UF'), ufilename)
- if desc:
- mupdf.pdf_dict_put_text_string(entry, PDF_NAME('Desc'), desc)
- return xref
- def _extend_toc_items(self, items):
- """Add color info to all items of an extended TOC list."""
- if self.is_closed:
- raise ValueError("document closed")
- if g_use_extra:
- return extra.Document_extend_toc_items( self.this, items)
- pdf = _as_pdf_document(self)
- zoom = "zoom"
- bold = "bold"
- italic = "italic"
- collapse = "collapse"
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- if not root.m_internal:
- return
- olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
- if not olroot.m_internal:
- return
- first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
- if not first.m_internal:
- return
- xrefs = []
- xrefs = JM_outline_xrefs(first, xrefs)
- n = len(xrefs)
- m = len(items)
- if not n:
- return
- if n != m:
- raise IndexError( "internal error finding outline xrefs")
- # update all TOC item dictionaries
- for i in range(n):
- xref = int(xrefs[i])
- item = items[i]
- itemdict = item[3]
- if not isinstance(itemdict, dict):
- raise ValueError( "need non-simple TOC format")
- itemdict[dictkey_xref] = xrefs[i]
- bm = mupdf.pdf_load_object(pdf, xref)
- flags = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('F')))
- if flags == 1:
- itemdict[italic] = True
- elif flags == 2:
- itemdict[bold] = True
- elif flags == 3:
- itemdict[italic] = True
- itemdict[bold] = True
- count = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('Count')))
- if count < 0:
- itemdict[collapse] = True
- elif count > 0:
- itemdict[collapse] = False
- col = mupdf.pdf_dict_get(bm, PDF_NAME('C'))
- if mupdf.pdf_is_array(col) and mupdf.pdf_array_len(col) == 3:
- color = (
- mupdf.pdf_to_real(mupdf.pdf_array_get(col, 0)),
- mupdf.pdf_to_real(mupdf.pdf_array_get(col, 1)),
- mupdf.pdf_to_real(mupdf.pdf_array_get(col, 2)),
- )
- itemdict[dictkey_color] = color
- z=0
- obj = mupdf.pdf_dict_get(bm, PDF_NAME('Dest'))
- if not obj.m_internal or not mupdf.pdf_is_array(obj):
- obj = mupdf.pdf_dict_getl(bm, PDF_NAME('A'), PDF_NAME('D'))
- if mupdf.pdf_is_array(obj) and mupdf.pdf_array_len(obj) == 5:
- z = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, 4))
- itemdict[zoom] = float(z)
- item[3] = itemdict
- items[i] = item
- def _forget_page(self, page: Page):
- """Remove a page from document page dict."""
- pid = id(page)
- if pid in self._page_refs:
- #self._page_refs[pid] = None
- del self._page_refs[pid]
- def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limit: int, idx: int = 0):
- pdf = _as_pdf_document(self)
- mylimit = limit
- if mylimit < 256:
- mylimit = 256
- if ordering >= 0:
- data, size, index = mupdf.fz_lookup_cjk_font(ordering)
- font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
- else:
- data, size = mupdf.fz_lookup_base14_font(bfname)
- if data:
- font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
- else:
- buf = JM_get_fontbuffer(pdf, xref)
- if not buf.m_internal:
- raise Exception("font at xref %d is not supported" % xref)
- font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0)
- wlist = []
- for i in range(mylimit):
- glyph = mupdf.fz_encode_character(font, i)
- adv = mupdf.fz_advance_glyph(font, glyph, 0)
- if ordering >= 0:
- glyph = i
- if glyph > 0:
- wlist.append( (glyph, adv))
- else:
- wlist.append( (glyph, 0.0))
- return wlist
- def _get_page_labels(self):
- pdf = _as_pdf_document(self)
- rc = []
- pagelabels = mupdf.pdf_new_name("PageLabels")
- obj = mupdf.pdf_dict_getl( mupdf.pdf_trailer(pdf), PDF_NAME('Root'), pagelabels)
- if not obj.m_internal:
- return rc
- # simple case: direct /Nums object
- nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Nums')))
- if nums.m_internal:
- JM_get_page_labels(rc, nums)
- return rc
- # case: /Kids/Nums
- nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_getl(obj, PDF_NAME('Kids'), PDF_NAME('Nums')))
- if nums.m_internal:
- JM_get_page_labels(rc, nums)
- return rc
- # case: /Kids is an array of multiple /Nums
- kids = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Kids')))
- if not kids.m_internal or not mupdf.pdf_is_array(kids):
- return rc
- n = mupdf.pdf_array_len(kids)
- for i in range(n):
- nums = mupdf.pdf_resolve_indirect(
- mupdf.pdf_dict_get(
- mupdf.pdf_array_get(kids, i),
- PDF_NAME('Nums'),
- )
- )
- JM_get_page_labels(rc, nums)
- return rc
- def _getMetadata(self, key):
- """Get metadata."""
- try:
- return mupdf.fz_lookup_metadata2( self.this, key)
- except Exception:
- if g_exceptions_verbose > 2: exception_info()
- return ''
- def _getOLRootNumber(self):
- """Get xref of Outline Root, create it if missing."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- # get main root
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- # get outline root
- olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
- if not olroot.m_internal:
- olroot = mupdf.pdf_new_dict( pdf, 4)
- mupdf.pdf_dict_put( olroot, PDF_NAME('Type'), PDF_NAME('Outlines'))
- ind_obj = mupdf.pdf_add_object( pdf, olroot)
- mupdf.pdf_dict_put( root, PDF_NAME('Outlines'), ind_obj)
- olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
- return mupdf.pdf_to_num( olroot)
- def _getPDFfileid(self):
- """Get PDF file id."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return
- idlist = []
- identity = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
- if identity.m_internal:
- n = mupdf.pdf_array_len(identity)
- for i in range(n):
- o = mupdf.pdf_array_get(identity, i)
- text = mupdf.pdf_to_text_string(o)
- hex_ = binascii.hexlify(text)
- idlist.append(hex_)
- return idlist
- def _getPageInfo(self, pno, what):
- """List fonts, images, XObjects used on a page."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- doc = self.this
- pageCount = mupdf.pdf_count_pages(doc) if isinstance(doc, mupdf.PdfDocument) else mupdf.fz_count_pages(doc)
- n = pno # pno < 0 is allowed
- while n < 0:
- n += pageCount # make it non-negative
- if n >= pageCount:
- raise ValueError( MSG_BAD_PAGENO)
- pdf = _as_pdf_document(self)
- pageref = mupdf.pdf_lookup_page_obj(pdf, n)
- rsrc = mupdf.pdf_dict_get_inheritable(pageref, mupdf.PDF_ENUM_NAME_Resources)
- liste = []
- tracer = []
- if rsrc.m_internal:
- JM_scan_resources(pdf, rsrc, liste, what, 0, tracer)
- return liste
- def _insert_font(self, fontfile=None, fontbuffer=None):
- '''
- Utility: insert font from file or binary.
- '''
- pdf = _as_pdf_document(self)
- if not fontfile and not fontbuffer:
- raise ValueError( MSG_FILE_OR_BUFFER)
- value = JM_insert_font(pdf, None, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1)
- return value
- def _loadOutline(self):
- """Load first outline."""
- doc = self.this
- assert isinstance( doc, mupdf.FzDocument)
- try:
- ol = mupdf.fz_load_outline( doc)
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return
- return Outline( ol)
- def _make_page_map(self):
- """Make an array page number -> page object."""
- if self.is_closed:
- raise ValueError("document closed")
- assert 0, f'_make_page_map() is no-op'
- def _move_copy_page(self, pno, nb, before, copy):
- """Move or copy a PDF page reference."""
- pdf = _as_pdf_document(self)
- same = 0
- # get the two page objects -----------------------------------
- # locate the /Kids arrays and indices in each
- page1, parent1, i1 = pdf_lookup_page_loc( pdf, pno)
- kids1 = mupdf.pdf_dict_get( parent1, PDF_NAME('Kids'))
- page2, parent2, i2 = pdf_lookup_page_loc( pdf, nb)
- kids2 = mupdf.pdf_dict_get( parent2, PDF_NAME('Kids'))
- if before: # calc index of source page in target /Kids
- pos = i2
- else:
- pos = i2 + 1
- # same /Kids array? ------------------------------------------
- same = mupdf.pdf_objcmp( kids1, kids2)
- # put source page in target /Kids array ----------------------
- if not copy and same != 0: # update parent in page object
- mupdf.pdf_dict_put( page1, PDF_NAME('Parent'), parent2)
- mupdf.pdf_array_insert( kids2, page1, pos)
- if same != 0: # different /Kids arrays ----------------------
- parent = parent2
- while parent.m_internal: # increase /Count objects in parents
- count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
- mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
- parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
- if not copy: # delete original item
- mupdf.pdf_array_delete( kids1, i1)
- parent = parent1
- while parent.m_internal: # decrease /Count objects in parents
- count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
- mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count - 1)
- parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
- else: # same /Kids array
- if copy: # source page is copied
- parent = parent2
- while parent.m_internal: # increase /Count object in parents
- count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
- mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
- parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
- else:
- if i1 < pos:
- mupdf.pdf_array_delete( kids1, i1)
- else:
- mupdf.pdf_array_delete( kids1, i1 + 1)
- if pdf.m_internal.rev_page_map: # page map no longer valid: drop it
- mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
- self._reset_page_refs()
- def _newPage(self, pno=-1, width=595, height=842):
- """Make a new PDF page."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if g_use_extra:
- extra._newPage( self.this, pno, width, height)
- else:
- pdf = _as_pdf_document(self)
- mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- mediabox.x1 = width
- mediabox.y1 = height
- contents = mupdf.FzBuffer()
- if pno < -1:
- raise ValueError( MSG_BAD_PAGENO)
- # create /Resources and /Contents objects
- #resources = pdf.add_object(pdf.new_dict(1))
- resources = mupdf.pdf_add_new_dict(pdf, 1)
- page_obj = mupdf.pdf_add_page( pdf, mediabox, 0, resources, contents)
- mupdf.pdf_insert_page( pdf, pno, page_obj)
- # fixme: pdf->dirty = 1;
- self._reset_page_refs()
- return self[pno]
- def _remove_links_to(self, numbers):
- pdf = _as_pdf_document(self)
- _remove_dest_range(pdf, numbers)
- def _remove_toc_item(self, xref):
- # "remove" bookmark by letting it point to nowhere
- pdf = _as_pdf_document(self)
- item = mupdf.pdf_new_indirect(pdf, xref, 0)
- mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
- mupdf.pdf_dict_del( item, PDF_NAME('A'))
- color = mupdf.pdf_new_array( pdf, 3)
- for i in range(3):
- mupdf.pdf_array_push_real( color, 0.8)
- mupdf.pdf_dict_put( item, PDF_NAME('C'), color)
- def _reset_page_refs(self):
- """Invalidate all pages in document dictionary."""
- if getattr(self, "is_closed", True):
- return
- pages = [p for p in self._page_refs.values()]
- for page in pages:
- if page:
- page._erase()
- page = None
- self._page_refs.clear()
- def _set_page_labels(self, labels):
- pdf = _as_pdf_document(self)
- pagelabels = mupdf.pdf_new_name("PageLabels")
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- mupdf.pdf_dict_del(root, pagelabels)
- mupdf.pdf_dict_putl(root, mupdf.pdf_new_array(pdf, 0), pagelabels, PDF_NAME('Nums'))
- xref = self.pdf_catalog()
- text = self.xref_object(xref, compressed=True)
- text = text.replace("/Nums[]", "/Nums[%s]" % labels)
- self.update_object(xref, text)
- def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None):
- '''
- "update" bookmark by letting it point to nowhere
- '''
- pdf = _as_pdf_document(self)
- item = mupdf.pdf_new_indirect( pdf, xref, 0)
- if title:
- mupdf.pdf_dict_put_text_string( item, PDF_NAME('Title'), title)
- if action:
- mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
- obj = JM_pdf_obj_from_str( pdf, action)
- mupdf.pdf_dict_put( item, PDF_NAME('A'), obj)
- mupdf.pdf_dict_put_int( item, PDF_NAME('F'), flags)
- if color:
- c = mupdf.pdf_new_array( pdf, 3)
- for i in range(3):
- f = color[i]
- mupdf.pdf_array_push_real( c, f)
- mupdf.pdf_dict_put( item, PDF_NAME('C'), c)
- elif color is not None:
- mupdf.pdf_dict_del( item, PDF_NAME('C'))
- if collapse is not None:
- if mupdf.pdf_dict_get( item, PDF_NAME('Count')).m_internal:
- i = mupdf.pdf_dict_get_int( item, PDF_NAME('Count'))
- if (i < 0 and collapse is False) or (i > 0 and collapse is True):
- i = i * (-1)
- mupdf.pdf_dict_put_int( item, PDF_NAME('Count'), i)
- @property
- def FormFonts(self):
- """Get list of field font resource names."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return
- fonts = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('DR'),
- PDF_NAME('Font'),
- )
- liste = list()
- if fonts.m_internal and mupdf.pdf_is_dict(fonts): # fonts exist
- n = mupdf.pdf_dict_len(fonts)
- for i in range(n):
- f = mupdf.pdf_dict_get_key(fonts, i)
- liste.append(JM_UnicodeFromStr(mupdf.pdf_to_name(f)))
- return liste
- def add_layer(self, name, creator=None, on=None):
- """Add a new OC layer."""
- pdf = _as_pdf_document(self)
- JM_add_layer_config( pdf, name, creator, on)
- mupdf.ll_pdf_read_ocg( pdf.m_internal)
- def add_ocg(self, name, config=-1, on=1, intent=None, usage=None):
- """Add new optional content group."""
- xref = 0
- pdf = _as_pdf_document(self)
- # make the OCG
- ocg = mupdf.pdf_add_new_dict(pdf, 3)
- mupdf.pdf_dict_put(ocg, PDF_NAME('Type'), PDF_NAME('OCG'))
- mupdf.pdf_dict_put_text_string(ocg, PDF_NAME('Name'), name)
- intents = mupdf.pdf_dict_put_array(ocg, PDF_NAME('Intent'), 2)
- if not intent:
- mupdf.pdf_array_push(intents, PDF_NAME('View'))
- elif not isinstance(intent, str):
- assert 0, f'fixme: intent is not a str. {type(intent)=} {type=}'
- #n = len(intent)
- #for i in range(n):
- # item = intent[i]
- # c = JM_StrAsChar(item);
- # if (c) {
- # pdf_array_push(gctx, intents, pdf_new_name(gctx, c));
- # }
- # Py_DECREF(item);
- #}
- else:
- mupdf.pdf_array_push(intents, mupdf.pdf_new_name(intent))
- use_for = mupdf.pdf_dict_put_dict(ocg, PDF_NAME('Usage'), 3)
- ci_name = mupdf.pdf_new_name("CreatorInfo")
- cre_info = mupdf.pdf_dict_put_dict(use_for, ci_name, 2)
- mupdf.pdf_dict_put_text_string(cre_info, PDF_NAME('Creator'), "PyMuPDF")
- if usage:
- mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), usage)
- else:
- mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), "Artwork")
- indocg = mupdf.pdf_add_object(pdf, ocg)
- # Insert OCG in the right config
- ocp = JM_ensure_ocproperties(pdf)
- obj = mupdf.pdf_dict_get(ocp, PDF_NAME('OCGs'))
- mupdf.pdf_array_push(obj, indocg)
- if config > -1:
- obj = mupdf.pdf_dict_get(ocp, PDF_NAME('Configs'))
- if not mupdf.pdf_is_array(obj):
- raise ValueError( MSG_BAD_OC_CONFIG)
- cfg = mupdf.pdf_array_get(obj, config)
- if not cfg.m_internal:
- raise ValueError( MSG_BAD_OC_CONFIG)
- else:
- cfg = mupdf.pdf_dict_get(ocp, PDF_NAME('D'))
- obj = mupdf.pdf_dict_get(cfg, PDF_NAME('Order'))
- if not obj.m_internal:
- obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('Order'), 1)
- mupdf.pdf_array_push(obj, indocg)
- if on:
- obj = mupdf.pdf_dict_get(cfg, PDF_NAME('ON'))
- if not obj.m_internal:
- obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('ON'), 1)
- else:
- obj =mupdf.pdf_dict_get(cfg, PDF_NAME('OFF'))
- if not obj.m_internal:
- obj =mupdf.pdf_dict_put_array(cfg, PDF_NAME('OFF'), 1)
- mupdf.pdf_array_push(obj, indocg)
- # let MuPDF take note: re-read OCProperties
- mupdf.ll_pdf_read_ocg(pdf.m_internal)
- xref = mupdf.pdf_to_num(indocg)
- return xref
- def authenticate(self, password):
- """Decrypt document."""
- if self.is_closed:
- raise ValueError("document closed")
- val = mupdf.fz_authenticate_password(self.this, password)
- if val: # the doc is decrypted successfully and we init the outline
- self.is_encrypted = False
- self.is_encrypted = False
- self.init_doc()
- self.thisown = True
- return val
- def can_save_incrementally(self):
- """Check whether incremental saves are possible."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False
- return mupdf.pdf_can_be_saved_incrementally(pdf)
- def bake(self, *, annots: bool = True, widgets: bool = True) -> None:
- """Convert annotations or fields to permanent content.
- Notes:
- Converts annotations or widgets to permanent page content, like
- text and vector graphics, as appropriate.
- After execution, pages will still look the same, but no longer
- have annotations, respectively no fields.
- If widgets are selected the PDF will no longer be a Form PDF.
- Args:
- annots: convert annotations
- widgets: convert form fields
- """
- pdf = _as_pdf_document(self)
- mupdf.pdf_bake_document(pdf, int(annots), int(widgets))
- @property
- def chapter_count(self):
- """Number of chapters."""
- if self.is_closed:
- raise ValueError("document closed")
- return mupdf.fz_count_chapters( self.this)
- def chapter_page_count(self, chapter):
- """Page count of chapter."""
- if self.is_closed:
- raise ValueError("document closed")
- chapters = mupdf.fz_count_chapters( self.this)
- if chapter < 0 or chapter >= chapters:
- raise ValueError( "bad chapter number")
- pages = mupdf.fz_count_chapter_pages( self.this, chapter)
- return pages
- def close(self):
- """Close document."""
- if getattr(self, "is_closed", True):
- raise ValueError("document closed")
- # self._cleanup()
- if hasattr(self, "_outline") and self._outline:
- self._outline = None
- self._reset_page_refs()
- #self.metadata = None
- #self.stream = None
- self.is_closed = True
- #self.FontInfos = []
- self.Graftmaps = {} # Fixes test_3140().
- #self.ShownPages = {}
- #self.InsertedImages = {}
- #self.this = None
- self.this = None
- def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0):
- """Convert document to a PDF, selecting page range and optional rotation. Output bytes object."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- fz_doc = self.this
- fp = from_page
- tp = to_page
- srcCount = mupdf.fz_count_pages(fz_doc)
- if fp < 0:
- fp = 0
- if fp > srcCount - 1:
- fp = srcCount - 1
- if tp < 0:
- tp = srcCount - 1
- if tp > srcCount - 1:
- tp = srcCount - 1
- len0 = len(JM_mupdf_warnings_store)
- doc = JM_convert_to_pdf(fz_doc, fp, tp, rotate)
- len1 = len(JM_mupdf_warnings_store)
- for i in range(len0, len1):
- message(f'{JM_mupdf_warnings_store[i]}')
- return doc
- def copy_page(self, pno: int, to: int =-1):
- """Copy a page within a PDF document.
- This will only create another reference of the same page object.
- Args:
- pno: source page number
- to: put before this page, '-1' means after last page.
- """
- if self.is_closed:
- raise ValueError("document closed")
- page_count = len(self)
- if (
- pno not in range(page_count)
- or to not in range(-1, page_count)
- ):
- raise ValueError("bad page number(s)")
- before = 1
- copy = 1
- if to == -1:
- to = page_count - 1
- before = 0
- return self._move_copy_page(pno, to, before, copy)
- def del_xml_metadata(self):
- """Delete XML metadata."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- if root.m_internal:
- mupdf.pdf_dict_del( root, PDF_NAME('Metadata'))
- def delete_page(self, pno: int =-1):
- """ Delete one page from a PDF.
- """
- return self.delete_pages(pno)
- def delete_pages(self, *args, **kw):
- """Delete pages from a PDF.
- Args:
- Either keywords 'from_page'/'to_page', or two integers to
- specify the first/last page to delete.
- Or a list/tuple/range object, which can contain arbitrary
- page numbers.
- Or a single integer page number.
- """
- if not self.is_pdf:
- raise ValueError("is no PDF")
- if self.is_closed:
- raise ValueError("document closed")
- page_count = self.page_count # page count of document
- f = t = -1
- if kw: # check if keywords were used
- if args: # then no positional args are allowed
- raise ValueError("cannot mix keyword and positional argument")
- f = kw.get("from_page", -1) # first page to delete
- t = kw.get("to_page", -1) # last page to delete
- while f < 0:
- f += page_count
- while t < 0:
- t += page_count
- if not f <= t < page_count:
- raise ValueError("bad page number(s)")
- numbers = tuple(range(f, t + 1))
- else:
- if len(args) > 2 or args == []:
- raise ValueError("need 1 or 2 positional arguments")
- if len(args) == 2:
- f, t = args
- if not (type(f) is int and type(t) is int):
- raise ValueError("both arguments must be int")
- if f > t:
- f, t = t, f
- if not f <= t < page_count:
- raise ValueError("bad page number(s)")
- numbers = tuple(range(f, t + 1))
- elif isinstance(args[0], int):
- pno = args[0]
- while pno < 0:
- pno += page_count
- numbers = (pno,)
- else:
- numbers = tuple(args[0])
- numbers = list(map(int, set(numbers))) # ensure unique integers
- if numbers == []:
- message("nothing to delete")
- return
- numbers.sort()
- if numbers[0] < 0 or numbers[-1] >= page_count:
- raise ValueError("bad page number(s)")
- frozen_numbers = frozenset(numbers)
- toc = self.get_toc()
- for i, xref in enumerate(self.get_outline_xrefs()):
- if toc[i][2] - 1 in frozen_numbers:
- self._remove_toc_item(xref) # remove target in PDF object
- self._remove_links_to(frozen_numbers)
- for i in reversed(numbers): # delete pages, last to first
- self._delete_page(i)
- self._reset_page_refs()
- def embfile_add(self,
- name: str,
- buffer_: ByteString,
- filename: OptStr =None,
- ufilename: OptStr =None,
- desc: OptStr =None,
- ) -> None:
- """Add an item to the EmbeddedFiles array.
- Args:
- name: name of the new item, must not already exist.
- buffer_: (binary data) the file content.
- filename: (str) the file name, default: the name
- ufilename: (unicode) the file name, default: filename
- desc: (str) the description.
- """
- filenames = self.embfile_names()
- msg = "Name '%s' already exists." % str(name)
- if name in filenames:
- raise ValueError(msg)
- if filename is None:
- filename = name
- if ufilename is None:
- ufilename = filename
- if desc is None:
- desc = name
- xref = self._embfile_add(
- name,
- buffer_=buffer_,
- filename=filename,
- ufilename=ufilename,
- desc=desc,
- )
- date = get_pdf_now()
- self.xref_set_key(xref, "Type", "/EmbeddedFile")
- self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date))
- self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
- return xref
- def embfile_count(self) -> int:
- """Get number of EmbeddedFiles."""
- return len(self.embfile_names())
- def embfile_del(self, item: typing.Union[int, str]):
- """Delete an entry from EmbeddedFiles.
- Notes:
- The argument must be name or index of an EmbeddedFiles item.
- Physical deletion of data will happen on save to a new
- file with appropriate garbage option.
- Args:
- item: name or number of item.
- Returns:
- None
- """
- idx = self._embeddedFileIndex(item)
- return self._embfile_del(idx)
- def embfile_get(self, item: typing.Union[int, str]) -> bytes:
- """Get the content of an item in the EmbeddedFiles array.
- Args:
- item: number or name of item.
- Returns:
- (bytes) The file content.
- """
- idx = self._embeddedFileIndex(item)
- return self._embeddedFileGet(idx)
- def embfile_info(self, item: typing.Union[int, str]) -> dict:
- """Get information of an item in the EmbeddedFiles array.
- Args:
- item: number or name of item.
- Returns:
- Information dictionary.
- """
- idx = self._embeddedFileIndex(item)
- infodict = {"name": self.embfile_names()[idx]}
- xref = self._embfile_info(idx, infodict)
- t, date = self.xref_get_key(xref, "Params/CreationDate")
- if t != "null":
- infodict["creationDate"] = date
- t, date = self.xref_get_key(xref, "Params/ModDate")
- if t != "null":
- infodict["modDate"] = date
- t, md5 = self.xref_get_key(xref, "Params/CheckSum")
- if t != "null":
- infodict["checksum"] = binascii.hexlify(md5.encode()).decode()
- return infodict
- def embfile_names(self) -> list:
- """Get list of names of EmbeddedFiles."""
- filenames = []
- self._embfile_names(filenames)
- return filenames
- def embfile_upd(self,
- item: typing.Union[int, str],
- buffer_: OptBytes =None,
- filename: OptStr =None,
- ufilename: OptStr =None,
- desc: OptStr =None,
- ) -> None:
- """Change an item of the EmbeddedFiles array.
- Notes:
- Only provided parameters are changed. If all are omitted,
- the method is a no-op.
- Args:
- item: number or name of item.
- buffer_: (binary data) the new file content.
- filename: (str) the new file name.
- ufilename: (unicode) the new filen ame.
- desc: (str) the new description.
- """
- idx = self._embeddedFileIndex(item)
- xref = self._embfile_upd(
- idx,
- buffer_=buffer_,
- filename=filename,
- ufilename=ufilename,
- desc=desc,
- )
- date = get_pdf_now()
- self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
- return xref
- def extract_font(self, xref=0, info_only=0, named=None):
- '''
- Get a font by xref. Returns a tuple or dictionary.
- '''
- #log( '{=xref info_only}')
- pdf = _as_pdf_document(self)
- obj = mupdf.pdf_load_object(pdf, xref)
- type_ = mupdf.pdf_dict_get(obj, PDF_NAME('Type'))
- subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
- if (mupdf.pdf_name_eq(type_, PDF_NAME('Font'))
- and not mupdf.pdf_to_name( subtype).startswith('CIDFontType')
- ):
- basefont = mupdf.pdf_dict_get(obj, PDF_NAME('BaseFont'))
- if not basefont.m_internal or mupdf.pdf_is_null(basefont):
- bname = mupdf.pdf_dict_get(obj, PDF_NAME('Name'))
- else:
- bname = basefont
- ext = JM_get_fontextension(pdf, xref)
- if ext != 'n/a' and not info_only:
- buffer_ = JM_get_fontbuffer(pdf, xref)
- bytes_ = JM_BinFromBuffer(buffer_)
- else:
- bytes_ = b''
- if not named:
- rc = (
- JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
- JM_UnicodeFromStr(ext),
- JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
- bytes_,
- )
- else:
- rc = {
- dictkey_name: JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
- dictkey_ext: JM_UnicodeFromStr(ext),
- dictkey_type: JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
- dictkey_content: bytes_,
- }
- else:
- if not named:
- rc = '', '', '', b''
- else:
- rc = {
- dictkey_name: '',
- dictkey_ext: '',
- dictkey_type: '',
- dictkey_content: b'',
- }
- return rc
- def extract_image(self, xref):
- """Get image by xref. Returns a dictionary."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
- raise ValueError( MSG_BAD_XREF)
- obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
- if not mupdf.pdf_name_eq(subtype, PDF_NAME('Image')):
- raise ValueError( "not an image")
- o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask'))
- if o.m_internal:
- smask = mupdf.pdf_to_num(o)
- else:
- smask = 0
- # load the image
- img = mupdf.pdf_load_image(pdf, obj)
- rc = dict()
- _make_image_dict(img, rc)
- rc[dictkey_smask] = smask
- rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace())
- return rc
- def ez_save(
- self,
- filename,
- garbage=3,
- clean=False,
- deflate=True,
- deflate_images=True,
- deflate_fonts=True,
- incremental=False,
- ascii=False,
- expand=False,
- linear=False,
- pretty=False,
- encryption=1,
- permissions=4095,
- owner_pw=None,
- user_pw=None,
- no_new_id=True,
- preserve_metadata=1,
- use_objstms=1,
- compression_effort=0,
- ):
- '''
- Save PDF using some different defaults
- '''
- return self.save(
- filename,
- garbage=garbage,
- clean=clean,
- deflate=deflate,
- deflate_images=deflate_images,
- deflate_fonts=deflate_fonts,
- incremental=incremental,
- ascii=ascii,
- expand=expand,
- linear=linear,
- pretty=pretty,
- encryption=encryption,
- permissions=permissions,
- owner_pw=owner_pw,
- user_pw=user_pw,
- no_new_id=no_new_id,
- preserve_metadata=preserve_metadata,
- use_objstms=use_objstms,
- compression_effort=compression_effort,
- )
- def find_bookmark(self, bm):
- """Find new location after layouting a document."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- location = mupdf.fz_lookup_bookmark2( self.this, bm)
- return location.chapter, location.page
- def fullcopy_page(self, pno, to=-1):
- """Make a full page duplicate."""
- pdf = _as_pdf_document(self)
- page_count = mupdf.pdf_count_pages( pdf)
- try:
- if (not _INRANGE(pno, 0, page_count - 1)
- or not _INRANGE(to, -1, page_count - 1)
- ):
- raise ValueError( MSG_BAD_PAGENO)
- page1 = mupdf.pdf_resolve_indirect( mupdf.pdf_lookup_page_obj( pdf, pno))
- page2 = mupdf.pdf_deep_copy_obj( page1)
- old_annots = mupdf.pdf_dict_get( page2, PDF_NAME('Annots'))
- # copy annotations, but remove Popup and IRT types
- if old_annots.m_internal:
- n = mupdf.pdf_array_len( old_annots)
- new_annots = mupdf.pdf_new_array( pdf, n)
- for i in range(n):
- o = mupdf.pdf_array_get( old_annots, i)
- subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
- if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
- continue
- if mupdf.pdf_dict_gets( o, "IRT").m_internal:
- continue
- copy_o = mupdf.pdf_deep_copy_obj( mupdf.pdf_resolve_indirect( o))
- xref = mupdf.pdf_create_object( pdf)
- mupdf.pdf_update_object( pdf, xref, copy_o)
- copy_o = mupdf.pdf_new_indirect( pdf, xref, 0)
- mupdf.pdf_dict_del( copy_o, PDF_NAME('Popup'))
- mupdf.pdf_dict_del( copy_o, PDF_NAME('P'))
- mupdf.pdf_array_push( new_annots, copy_o)
- mupdf.pdf_dict_put( page2, PDF_NAME('Annots'), new_annots)
- # copy the old contents stream(s)
- res = JM_read_contents( page1)
- # create new /Contents object for page2
- if res and res.m_internal:
- #contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" ", 1), NULL, 0)
- contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" "), mupdf.PdfObj(), 0)
- JM_update_stream( pdf, contents, res, 1)
- mupdf.pdf_dict_put( page2, PDF_NAME('Contents'), contents)
- # now insert target page, making sure it is an indirect object
- xref = mupdf.pdf_create_object( pdf) # get new xref
- mupdf.pdf_update_object( pdf, xref, page2) # store new page
- page2 = mupdf.pdf_new_indirect( pdf, xref, 0) # reread object
- mupdf.pdf_insert_page( pdf, to, page2) # and store the page
- finally:
- mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
- self._reset_page_refs()
- def get_layer(self, config=-1):
- """Content of ON, OFF, RBGroups of an OC layer."""
- pdf = _as_pdf_document(self)
- ocp = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('OCProperties'),
- )
- if not ocp.m_internal:
- return
- if config == -1:
- obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
- else:
- obj = mupdf.pdf_array_get(
- mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
- config,
- )
- if not obj.m_internal:
- raise ValueError( MSG_BAD_OC_CONFIG)
- rc = JM_get_ocg_arrays( obj)
- return rc
- def get_layers(self):
- """Show optional OC layers."""
- pdf = _as_pdf_document(self)
- n = mupdf.pdf_count_layer_configs( pdf)
- if n == 1:
- obj = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('OCProperties'),
- PDF_NAME('Configs'),
- )
- if not mupdf.pdf_is_array( obj):
- n = 0
- rc = []
- info = mupdf.PdfLayerConfig()
- for i in range(n):
- mupdf.pdf_layer_config_info( pdf, i, info)
- item = {
- "number": i,
- "name": info.name,
- "creator": info.creator,
- }
- rc.append( item)
- return rc
- def get_new_xref(self):
- """Make new xref."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xref = 0
- ENSURE_OPERATION(pdf)
- xref = mupdf.pdf_create_object(pdf)
- return xref
- def get_ocgs(self):
- """Show existing optional content groups."""
- ci = mupdf.pdf_new_name( "CreatorInfo")
- pdf = _as_pdf_document(self)
- ocgs = mupdf.pdf_dict_getl(
- mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')),
- PDF_NAME('OCProperties'),
- PDF_NAME('OCGs'),
- )
- rc = dict()
- if not mupdf.pdf_is_array( ocgs):
- return rc
- n = mupdf.pdf_array_len( ocgs)
- for i in range(n):
- ocg = mupdf.pdf_array_get( ocgs, i)
- xref = mupdf.pdf_to_num( ocg)
- name = mupdf.pdf_to_text_string( mupdf.pdf_dict_get( ocg, PDF_NAME('Name')))
- obj = mupdf.pdf_dict_getl( ocg, PDF_NAME('Usage'), ci, PDF_NAME('Subtype'))
- usage = None
- if obj.m_internal:
- usage = mupdf.pdf_to_name( obj)
- intents = list()
- intent = mupdf.pdf_dict_get( ocg, PDF_NAME('Intent'))
- if intent.m_internal:
- if mupdf.pdf_is_name( intent):
- intents.append( mupdf.pdf_to_name( intent))
- elif mupdf.pdf_is_array( intent):
- m = mupdf.pdf_array_len( intent)
- for j in range(m):
- o = mupdf.pdf_array_get( intent, j)
- if mupdf.pdf_is_name( o):
- intents.append( mupdf.pdf_to_name( o))
- hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
- item = {
- "name": name,
- "intent": intents,
- "on": not hidden,
- "usage": usage,
- }
- temp = xref
- rc[ temp] = item
- return rc
- def get_outline_xrefs(self):
- """Get list of outline xref numbers."""
- xrefs = []
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return xrefs
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- if not root.m_internal:
- return xrefs
- olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
- if not olroot.m_internal:
- return xrefs
- first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
- if not first.m_internal:
- return xrefs
- xrefs = JM_outline_xrefs(first, xrefs)
- return xrefs
- def get_page_fonts(self, pno: int, full: bool =False) -> list:
- """Retrieve a list of fonts used on a page.
- """
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if not self.is_pdf:
- return ()
- if type(pno) is not int:
- try:
- pno = pno.number
- except Exception:
- exception_info()
- raise ValueError("need a Page or page number")
- val = self._getPageInfo(pno, 1)
- if not full:
- return [v[:-1] for v in val]
- return val
- def get_page_images(self, pno: int, full: bool =False) -> list:
- """Retrieve a list of images used on a page.
- """
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if not self.is_pdf:
- return ()
- val = self._getPageInfo(pno, 2)
- if not full:
- return [v[:-1] for v in val]
- return val
- def get_page_xobjects(self, pno: int) -> list:
- """Retrieve a list of XObjects used on a page.
- """
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if not self.is_pdf:
- return ()
- val = self._getPageInfo(pno, 3)
- return val
- def get_sigflags(self):
- """Get the /SigFlags value."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return -1 # not a PDF
- sigflags = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('SigFlags'),
- )
- sigflag = -1
- if sigflags.m_internal:
- sigflag = mupdf.pdf_to_int(sigflags)
- return sigflag
- def get_xml_metadata(self):
- """Get document XML metadata."""
- xml = None
- pdf = _as_pdf_document(self, required=0)
- if pdf.m_internal:
- xml = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('Metadata'),
- )
- if xml is not None and xml.m_internal:
- buff = mupdf.pdf_load_stream(xml)
- rc = JM_UnicodeFromBuffer(buff)
- else:
- rc = ''
- return rc
- def init_doc(self):
- if self.is_encrypted:
- raise ValueError("cannot initialize - document still encrypted")
- self._outline = self._loadOutline()
- self.metadata = dict(
- [
- (k,self._getMetadata(v)) for k,v in {
- 'format':'format',
- 'title':'info:Title',
- 'author':'info:Author',
- 'subject':'info:Subject',
- 'keywords':'info:Keywords',
- 'creator':'info:Creator',
- 'producer':'info:Producer',
- 'creationDate':'info:CreationDate',
- 'modDate':'info:ModDate',
- 'trapped':'info:Trapped'
- }.items()
- ]
- )
- self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption')
- def insert_file(self,
- infile,
- from_page=-1,
- to_page=-1,
- start_at=-1,
- rotate=-1,
- links=True,
- annots=True,
- show_progress=0,
- final=1,
- ):
- '''
- Insert an arbitrary supported document to an existing PDF.
- The infile may be given as a filename, a Document or a Pixmap. Other
- parameters - where applicable - equal those of insert_pdf().
- '''
- src = None
- if isinstance(infile, Pixmap):
- if infile.colorspace.n > 3:
- infile = Pixmap(csRGB, infile)
- src = Document("png", infile.tobytes())
- elif isinstance(infile, Document):
- src = infile
- else:
- src = Document(infile)
- if not src:
- raise ValueError("bad infile parameter")
- if not src.is_pdf:
- pdfbytes = src.convert_to_pdf()
- src = Document("pdf", pdfbytes)
- return self.insert_pdf(
- src,
- from_page=from_page,
- to_page=to_page,
- start_at=start_at,
- rotate=rotate,
- links=links,
- annots=annots,
- show_progress=show_progress,
- final=final,
- )
- def insert_pdf(
- self,
- docsrc,
- *,
- from_page=-1,
- to_page=-1,
- start_at=-1,
- rotate=-1,
- links=1,
- annots=1,
- widgets=1,
- join_duplicates=0,
- show_progress=0,
- final=1,
- _gmap=None,
- ):
- """Insert a page range from another PDF.
- Args:
- docsrc: PDF to copy from. Must be different object, but may be same file.
- from_page: (int) first source page to copy, 0-based, default 0.
- to_page: (int) last source page to copy, 0-based, default last page.
- start_at: (int) from_page will become this page number in target.
- rotate: (int) rotate copied pages, default -1 is no change.
- links: (int/bool) whether to also copy links.
- annots: (int/bool) whether to also copy annotations.
- widgets: (int/bool) whether to also copy form fields.
- join_duplicates: (int/bool) join or rename duplicate widget names.
- show_progress: (int) progress message interval, 0 is no messages.
- final: (bool) indicates last insertion from this source PDF.
- _gmap: internal use only
- Copy sequence reversed if from_page > to_page."""
- # Insert pages from a source PDF into this PDF.
- # For reconstructing the links (_do_links method), we must save the
- # insertion point (start_at) if it was specified as -1.
- #log( 'insert_pdf(): start')
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if self._graft_id == docsrc._graft_id:
- raise ValueError("source and target cannot be same object")
- sa = start_at
- if sa < 0:
- sa = self.page_count
- outCount = self.page_count
- srcCount = docsrc.page_count
- # local copies of page numbers
- fp = from_page
- tp = to_page
- sa = start_at
- # normalize page numbers
- fp = max(fp, 0) # -1 = first page
- fp = min(fp, srcCount - 1) # but do not exceed last page
- if tp < 0:
- tp = srcCount - 1 # -1 = last page
- tp = min(tp, srcCount - 1) # but do not exceed last page
- if sa < 0:
- sa = outCount # -1 = behind last page
- sa = min(sa, outCount) # but that is also the limit
- if len(docsrc) > show_progress > 0:
- inname = os.path.basename(docsrc.name)
- if not inname:
- inname = "memory PDF"
- outname = os.path.basename(self.name)
- if not outname:
- outname = "memory PDF"
- message("Inserting '%s' at '%s'" % (inname, outname))
- # retrieve / make a Graftmap to avoid duplicate objects
- #log( 'insert_pdf(): Graftmaps')
- isrt = docsrc._graft_id
- _gmap = self.Graftmaps.get(isrt, None)
- if _gmap is None:
- #log( 'insert_pdf(): Graftmaps2')
- _gmap = Graftmap(self)
- self.Graftmaps[isrt] = _gmap
- if g_use_extra:
- #log( 'insert_pdf(): calling extra_FzDocument_insert_pdf()')
- extra_FzDocument_insert_pdf(
- self.this,
- docsrc.this,
- from_page,
- to_page,
- start_at,
- rotate,
- links,
- annots,
- show_progress,
- final,
- _gmap,
- )
- #log( 'insert_pdf(): extra_FzDocument_insert_pdf() returned.')
- else:
- pdfout = _as_pdf_document(self)
- pdfsrc = _as_pdf_document(docsrc)
- if not pdfout.m_internal or not pdfsrc.m_internal:
- raise TypeError( "source or target not a PDF")
- ENSURE_OPERATION(pdfout)
- JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap)
-
- #log( 'insert_pdf(): calling self._reset_page_refs()')
- self._reset_page_refs()
- if links:
- #log( 'insert_pdf(): calling self._do_links()')
- self._do_links(docsrc, from_page=fp, to_page=tp, start_at=sa)
- if widgets:
- self._do_widgets(docsrc, _gmap, from_page=fp, to_page=tp, start_at=sa, join_duplicates=join_duplicates)
- if final == 1:
- self.Graftmaps[isrt] = None
- #log( 'insert_pdf(): returning')
- @property
- def is_dirty(self):
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False
- r = mupdf.pdf_has_unsaved_changes(pdf)
- return True if r else False
- @property
- def is_fast_webaccess(self):
- '''
- Check whether we have a linearized PDF.
- '''
- pdf = _as_pdf_document(self, required=0)
- if pdf.m_internal:
- return mupdf.pdf_doc_was_linearized(pdf)
- return False # gracefully handle non-PDF
- @property
- def is_form_pdf(self):
- """Either False or PDF field count."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False
- count = -1
- try:
- fields = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer(pdf),
- mupdf.PDF_ENUM_NAME_Root,
- mupdf.PDF_ENUM_NAME_AcroForm,
- mupdf.PDF_ENUM_NAME_Fields,
- )
- if mupdf.pdf_is_array(fields):
- count = mupdf.pdf_array_len(fields)
- except Exception:
- if g_exceptions_verbose: exception_info()
- return False
- if count >= 0:
- return count
- return False
- @property
- def is_pdf(self):
- """Check for PDF."""
- if isinstance(self.this, mupdf.PdfDocument):
- return True
- # Avoid calling smupdf.pdf_specifics because it will end up creating
- # a new PdfDocument which will call pdf_create_document(), which is ok
- # but a little unnecessary.
- #
- if mupdf.ll_pdf_specifics(self.this.m_internal):
- ret = True
- else:
- ret = False
- return ret
- @property
- def is_reflowable(self):
- """Check if document is layoutable."""
- if self.is_closed:
- raise ValueError("document closed")
- return bool(mupdf.fz_is_document_reflowable(self))
- @property
- def is_repaired(self):
- """Check whether PDF was repaired."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False
- r = mupdf.pdf_was_repaired(pdf)
- if r:
- return True
- return False
- def journal_can_do(self):
- """Show if undo and / or redo are possible."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- undo=0
- redo=0
- pdf = _as_pdf_document(self)
- undo = mupdf.pdf_can_undo(pdf)
- redo = mupdf.pdf_can_redo(pdf)
- return {'undo': bool(undo), 'redo': bool(redo)}
- def journal_enable(self):
- """Activate document journalling."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- mupdf.pdf_enable_journal(pdf)
- def journal_is_enabled(self):
- """Check if journalling is enabled."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- enabled = pdf.m_internal and pdf.m_internal.journal
- return enabled
- def journal_load(self, filename):
- """Load a journal from a file."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- if isinstance(filename, str):
- mupdf.pdf_load_journal(pdf, filename)
- else:
- res = JM_BufferFromBytes(filename)
- stm = mupdf.fz_open_buffer(res)
- mupdf.pdf_deserialise_journal(pdf, stm)
- if not pdf.m_internal.journal:
- RAISEPY( "Journal and document do not match", JM_Exc_FileDataError)
- def journal_op_name(self, step):
- """Show operation name for given step."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- name = mupdf.pdf_undoredo_step(pdf, step)
- return name
- def journal_position(self):
- """Show journalling state."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- steps=0
- pdf = _as_pdf_document(self)
- rc, steps = mupdf.pdf_undoredo_state(pdf)
- return rc, steps
- def journal_redo(self):
- """Move forward in the journal."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- mupdf.pdf_redo(pdf)
- return True
- def journal_save(self, filename):
- """Save journal to a file."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- if isinstance(filename, str):
- mupdf.pdf_save_journal(pdf, filename)
- else:
- out = JM_new_output_fileptr(filename)
- mupdf.pdf_write_journal(pdf, out)
- out.fz_close_output()
- def journal_start_op(self, name=None):
- """Begin a journalling operation."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- if not pdf.m_internal.journal:
- raise RuntimeError( "Journalling not enabled")
- if name:
- mupdf.pdf_begin_operation(pdf, name)
- else:
- mupdf.pdf_begin_implicit_operation(pdf)
- def journal_stop_op(self):
- """End a journalling operation."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- mupdf.pdf_end_operation(pdf)
- def journal_undo(self):
- """Move backwards in the journal."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- mupdf.pdf_undo(pdf)
- return True
- @property
- def language(self):
- """Document language."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return
- lang = mupdf.pdf_document_language(pdf)
- if lang == mupdf.FZ_LANG_UNSET:
- return
- return mupdf.fz_string_from_text_language2(lang)
- @property
- def last_location(self):
- """Id (chapter, page) of last page."""
- if self.is_closed:
- raise ValueError("document closed")
- last_loc = mupdf.fz_last_page(self.this)
- return last_loc.chapter, last_loc.page
- def layer_ui_configs(self):
- """Show OC visibility status modifiable by user."""
- pdf = _as_pdf_document(self)
- info = mupdf.PdfLayerConfigUi()
- n = mupdf.pdf_count_layer_config_ui( pdf)
- rc = []
- for i in range(n):
- mupdf.pdf_layer_config_ui_info( pdf, i, info)
- if info.type == 1:
- type_ = "checkbox"
- elif info.type == 2:
- type_ = "radiobox"
- else:
- type_ = "label"
- item = {
- "number": i,
- "text": info.text,
- "depth": info.depth,
- "type": type_,
- "on": info.selected,
- "locked": info.locked,
- }
- rc.append(item)
- return rc
- def layout(self, rect=None, width=0, height=0, fontsize=11):
- """Re-layout a reflowable document."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- doc = self.this
- if not mupdf.fz_is_document_reflowable( doc):
- return
- w = width
- h = height
- r = JM_rect_from_py(rect)
- if not mupdf.fz_is_infinite_rect(r):
- w = r.x1 - r.x0
- h = r.y1 - r.y0
- if w <= 0.0 or h <= 0.0:
- raise ValueError( "bad page size")
- mupdf.fz_layout_document( doc, w, h, fontsize)
- self._reset_page_refs()
- self.init_doc()
- def load_page(self, page_id):
- """Load a page.
- 'page_id' is either a 0-based page number or a tuple (chapter, pno),
- with chapter number and page number within that chapter.
- """
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if page_id is None:
- page_id = 0
- if page_id not in self:
- raise ValueError("page not in document")
- if type(page_id) is int and page_id < 0:
- np = self.page_count
- while page_id < 0:
- page_id += np
- if isinstance(page_id, int):
- page = mupdf.fz_load_page(self.this, page_id)
- else:
- chapter, pagenum = page_id
- page = mupdf.fz_load_chapter_page(self.this, chapter, pagenum)
- val = Page(page, self)
- val.thisown = True
- val.parent = self
- self._page_refs[id(val)] = val
- val._annot_refs = weakref.WeakValueDictionary()
- val.number = page_id
- return val
- def location_from_page_number(self, pno):
- """Convert pno to (chapter, page)."""
- if self.is_closed:
- raise ValueError("document closed")
- this_doc = self.this
- loc = mupdf.fz_make_location(-1, -1)
- page_count = mupdf.fz_count_pages(this_doc)
- while pno < 0:
- pno += page_count
- if pno >= page_count:
- raise ValueError( MSG_BAD_PAGENO)
- loc = mupdf.fz_location_from_page_number(this_doc, pno)
- return loc.chapter, loc.page
- def make_bookmark(self, loc):
- """Make a page pointer before layouting document."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- loc = mupdf.FzLocation(*loc)
- mark = mupdf.ll_fz_make_bookmark2( self.this.m_internal, loc.internal())
- return mark
- @property
- def markinfo(self) -> dict:
- """Return the PDF MarkInfo value."""
- xref = self.pdf_catalog()
- if xref == 0:
- return None
- rc = self.xref_get_key(xref, "MarkInfo")
- if rc[0] == "null":
- return {}
- if rc[0] == "xref":
- xref = int(rc[1].split()[0])
- val = self.xref_object(xref, compressed=True)
- elif rc[0] == "dict":
- val = rc[1]
- else:
- val = None
- if val is None or not (val[:2] == "<<" and val[-2:] == ">>"):
- return {}
- valid = {"Marked": False, "UserProperties": False, "Suspects": False}
- val = val[2:-2].split("/")
- for v in val[1:]:
- try:
- key, value = v.split()
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return valid
- if value == "true":
- valid[key] = True
- return valid
- def move_page(self, pno: int, to: int =-1):
- """Move a page within a PDF document.
- Args:
- pno: source page number.
- to: put before this page, '-1' means after last page.
- """
- if self.is_closed:
- raise ValueError("document closed")
- page_count = len(self)
- if (pno not in range(page_count) or to not in range(-1, page_count)):
- raise ValueError("bad page number(s)")
- before = 1
- copy = 0
- if to == -1:
- to = page_count - 1
- before = 0
- return self._move_copy_page(pno, to, before, copy)
- @property
- def name(self):
- return self._name
-
- def need_appearances(self, value=None):
- """Get/set the NeedAppearances value."""
- if not self.is_form_pdf:
- return None
-
- pdf = _as_pdf_document(self)
- oldval = -1
- appkey = "NeedAppearances"
-
- form = mupdf.pdf_dict_getp(
- mupdf.pdf_trailer(pdf),
- "Root/AcroForm",
- )
- app = mupdf.pdf_dict_gets(form, appkey)
- if mupdf.pdf_is_bool(app):
- oldval = mupdf.pdf_to_bool(app)
- if value:
- mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_TRUE)
- else:
- mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_FALSE)
- if value is None:
- return oldval >= 0
- return value
- @property
- def needs_pass(self):
- """Indicate password required."""
- if self.is_closed:
- raise ValueError("document closed")
- document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super()
- ret = mupdf.fz_needs_password( document)
- return ret
- def next_location(self, page_id):
- """Get (chapter, page) of next page."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if type(page_id) is int:
- page_id = (0, page_id)
- if page_id not in self:
- raise ValueError("page id not in document")
- if tuple(page_id) == self.last_location:
- return ()
- this_doc = _as_fz_document(self)
- val = page_id[ 0]
- if not isinstance(val, int):
- RAISEPY(MSG_BAD_PAGEID, PyExc_ValueError)
- chapter = val
- val = page_id[ 1]
- pno = val
- loc = mupdf.fz_make_location(chapter, pno)
- next_loc = mupdf.fz_next_page( this_doc, loc)
- return next_loc.chapter, next_loc.page
- def page_annot_xrefs(self, n):
- if g_use_extra:
- return extra.page_annot_xrefs( self.this, n)
-
- if isinstance(self.this, mupdf.PdfDocument):
- page_count = mupdf.pdf_count_pages(self.this)
- pdf_document = self.this
- else:
- page_count = mupdf.fz_count_pages(self.this)
- pdf_document = _as_pdf_document(self)
- while n < 0:
- n += page_count
- if n > page_count:
- raise ValueError( MSG_BAD_PAGENO)
- page_obj = mupdf.pdf_lookup_page_obj(pdf_document, n)
- annots = JM_get_annot_xref_list(page_obj)
- return annots
- @property
- def page_count(self):
- """Number of pages."""
- if self.is_closed:
- raise ValueError('document closed')
- if g_use_extra:
- return self.page_count2(self)
- if isinstance( self.this, mupdf.FzDocument):
- return mupdf.fz_count_pages( self.this)
- else:
- return mupdf.pdf_count_pages( self.this)
- def page_cropbox(self, pno):
- """Get CropBox of page number (without loading page)."""
- if self.is_closed:
- raise ValueError("document closed")
- this_doc = self.this
- page_count = mupdf.fz_count_pages( this_doc)
- n = pno
- while n < 0:
- n += page_count
- pdf = _as_pdf_document(self)
- if n >= page_count:
- raise ValueError( MSG_BAD_PAGENO)
- pageref = mupdf.pdf_lookup_page_obj( pdf, n)
- cropbox = JM_cropbox(pageref)
- val = JM_py_from_rect(cropbox)
- val = Rect(val)
- return val
- def page_number_from_location(self, page_id):
- """Convert (chapter, pno) to page number."""
- if type(page_id) is int:
- np = self.page_count
- while page_id < 0:
- page_id += np
- page_id = (0, page_id)
- if page_id not in self:
- raise ValueError("page id not in document")
- chapter, pno = page_id
- loc = mupdf.fz_make_location( chapter, pno)
- page_n = mupdf.fz_page_number_from_location( self.this, loc)
- return page_n
- def page_xref(self, pno):
- """Get xref of page number."""
- if g_use_extra:
- return extra.page_xref( self.this, pno)
- if self.is_closed:
- raise ValueError("document closed")
- page_count = mupdf.fz_count_pages(self.this)
- n = pno
- while n < 0:
- n += page_count
- pdf = _as_pdf_document(self)
- xref = 0
- if n >= page_count:
- raise ValueError( MSG_BAD_PAGENO)
- xref = mupdf.pdf_to_num(mupdf.pdf_lookup_page_obj(pdf, n))
- return xref
- @property
- def pagelayout(self) -> str:
- """Return the PDF PageLayout value.
- """
- xref = self.pdf_catalog()
- if xref == 0:
- return None
- rc = self.xref_get_key(xref, "PageLayout")
- if rc[0] == "null":
- return "SinglePage"
- if rc[0] == "name":
- return rc[1][1:]
- return "SinglePage"
- @property
- def pagemode(self) -> str:
- """Return the PDF PageMode value.
- """
- xref = self.pdf_catalog()
- if xref == 0:
- return None
- rc = self.xref_get_key(xref, "PageMode")
- if rc[0] == "null":
- return "UseNone"
- if rc[0] == "name":
- return rc[1][1:]
- return "UseNone"
- if sys.implementation.version < (3, 9):
- # Appending `[Page]` causes `TypeError: 'ABCMeta' object is not subscriptable`.
- _pages_ret = collections.abc.Iterable
- else:
- _pages_ret = collections.abc.Iterable[Page]
- def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None) -> _pages_ret:
- """Return a generator iterator over a page range.
- Arguments have the same meaning as for the range() built-in.
- """
- if not self.page_count:
- return
- # set the start value
- start = start or 0
- while start < 0:
- start += self.page_count
- if start not in range(self.page_count):
- raise ValueError("bad start page number")
- # set the stop value
- stop = stop if stop is not None and stop <= self.page_count else self.page_count
- # set the step value
- if step == 0:
- raise ValueError("arg 3 must not be zero")
- if step is None:
- if start > stop:
- step = -1
- else:
- step = 1
- for pno in range(start, stop, step):
- yield (self.load_page(pno))
- def pdf_catalog(self):
- """Get xref of PDF catalog."""
- pdf = _as_pdf_document(self, required=0)
- xref = 0
- if not pdf.m_internal:
- return xref
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- xref = mupdf.pdf_to_num(root)
- return xref
- def pdf_trailer(self, compressed=0, ascii=0):
- """Get PDF trailer as a string."""
- return self.xref_object(-1, compressed=compressed, ascii=ascii)
- @property
- def permissions(self):
- """Document permissions."""
- if self.is_encrypted:
- return 0
- doc =self.this
- pdf = mupdf.pdf_document_from_fz_document(doc)
- # for PDF return result of standard function
- if pdf.m_internal:
- return mupdf.pdf_document_permissions(pdf)
- # otherwise simulate the PDF return value
- perm = 0xFFFFFFFC # all permissions granted
- # now switch off where needed
- if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_PRINT):
- perm = perm ^ mupdf.PDF_PERM_PRINT
- if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_EDIT):
- perm = perm ^ mupdf.PDF_PERM_MODIFY
- if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_COPY):
- perm = perm ^ mupdf.PDF_PERM_COPY
- if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_ANNOTATE):
- perm = perm ^ mupdf.PDF_PERM_ANNOTATE
- return perm
- def prev_location(self, page_id):
- """Get (chapter, page) of previous page."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if type(page_id) is int:
- page_id = (0, page_id)
- if page_id not in self:
- raise ValueError("page id not in document")
- if page_id == (0, 0):
- return ()
- chapter, pno = page_id
- loc = mupdf.fz_make_location(chapter, pno)
- prev_loc = mupdf.fz_previous_page(self.this, loc)
- return prev_loc.chapter, prev_loc.page
- def reload_page(self, page: Page) -> Page:
- """Make a fresh copy of a page."""
- old_annots = {} # copy annot references to here
- pno = page.number # save the page number
- for k, v in page._annot_refs.items(): # save the annot dictionary
- old_annots[k] = v
-
- # When we call `self.load_page()` below, it will end up in
- # fz_load_chapter_page(), which will return any matching page in the
- # document's list of non-ref-counted loaded pages, instead of actually
- # reloading the page.
- #
- # We want to assert that we have actually reloaded the fz_page, and not
- # simply returned the same `fz_page*` pointer from the document's list
- # of non-ref-counted loaded pages.
- #
- # So we first remove our reference to the `fz_page*`. This will
- # decrement .refs, and if .refs was 1, this is guaranteed to free the
- # `fz_page*` and remove it from the document's list if it was there. So
- # we are guaranteed that our returned `fz_page*` is from a genuine
- # reload, even if it happens to reuse the original block of memory.
- #
- # However if the original .refs is greater than one, there must be
- # other references to the `fz_page` somewhere, and we require that
- # these other references are not keeping the page in the document's
- # list. We check that we are returning a newly loaded page by
- # asserting that our returned `fz_page*` is different from the original
- # `fz_page*` - the original was not freed, so a new `fz_page` cannot
- # reuse the same block of memory.
- #
-
- refs_old = page.this.m_internal.refs
- m_internal_old = page.this.m_internal_value()
-
- page.this = None
- page._erase() # remove the page
- page = None
- TOOLS.store_shrink(100)
- page = self.load_page(pno) # reload the page
- # copy annot refs over to the new dictionary
- #page_proxy = weakref.proxy(page)
- for k, v in old_annots.items():
- annot = old_annots[k]
- #annot.parent = page_proxy # refresh parent to new page
- page._annot_refs[k] = annot
- if refs_old == 1:
- # We know that `page.this = None` will have decremented the ref
- # count to zero so we are guaranteed that the new `fz_page` is a
- # new page even if it happens to have reused the same block of
- # memory.
- pass
- else:
- # Check that the new `fz_page*` is different from the original.
- m_internal_new = page.this.m_internal_value()
- assert m_internal_new != m_internal_old, \
- f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}'
- return page
- def resolve_link(self, uri=None, chapters=0):
- """Calculate internal link destination.
- Args:
- uri: (str) some Link.uri
- chapters: (bool) whether to use (chapter, page) format
- Returns:
- (page_id, x, y) where x, y are point coordinates on the page.
- page_id is either page number (if chapters=0), or (chapter, pno).
- """
- if not uri:
- if chapters:
- return (-1, -1), 0, 0
- return -1, 0, 0
- try:
- loc, xp, yp = mupdf.fz_resolve_link(self.this, uri)
- except Exception:
- if g_exceptions_verbose: exception_info()
- if chapters:
- return (-1, -1), 0, 0
- return -1, 0, 0
- if chapters:
- return (loc.chapter, loc.page), xp, yp
- pno = mupdf.fz_page_number_from_location(self.this, loc)
- return pno, xp, yp
- def rewrite_images(
- self,
- dpi_threshold=None,
- dpi_target=0,
- quality=0,
- lossy=True,
- lossless=True,
- bitonal=True,
- color=True,
- gray=True,
- set_to_gray=False,
- options=None,
- ):
- """Rewrite images in a PDF document.
- The typical use case is to reduce the size of the PDF by recompressing
- images. Default parameters will convert all images to JPEG where
- possible, using the specified resolutions and quality. Exclude
- undesired images by setting parameters to False.
- Args:
- dpi_threshold: look at images with a larger DPI only.
- dpi_target: change eligible images to this DPI.
- quality: Quality of the recompressed images (0-100).
- lossy: process lossy image types (e.g. JPEG).
- lossless: process lossless image types (e.g. PNG).
- bitonal: process black-and-white images (e.g. FAX)
- color: process colored images.
- gray: process gray images.
- set_to_gray: whether to change the PDF to gray at process start.
- options: (PdfImageRewriterOptions) Custom options for image
- rewriting (optional). Expert use only. If provided, other
- parameters are ignored, except set_to_gray.
- """
- quality_str = str(quality)
- if not dpi_threshold:
- dpi_threshold = dpi_target = 0
- if dpi_target > 0 and dpi_target >= dpi_threshold:
- raise ValueError("{dpi_target=} must be less than {dpi_threshold=}")
- template_opts = mupdf.PdfImageRewriterOptions()
- dir1 = set(dir(template_opts)) # for checking that only existing options are set
- if not options:
- opts = mupdf.PdfImageRewriterOptions()
- if bitonal:
- opts.bitonal_image_recompress_method = mupdf.FZ_RECOMPRESS_FAX
- opts.bitonal_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.bitonal_image_subsample_to = dpi_target
- opts.bitonal_image_recompress_quality = quality_str
- opts.bitonal_image_subsample_threshold = dpi_threshold
- if color:
- if lossless:
- opts.color_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
- opts.color_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.color_lossless_image_subsample_to = dpi_target
- opts.color_lossless_image_subsample_threshold = dpi_threshold
- opts.color_lossless_image_recompress_quality = quality_str
- if lossy:
- opts.color_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
- opts.color_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.color_lossy_image_subsample_threshold = dpi_threshold
- opts.color_lossy_image_subsample_to = dpi_target
- opts.color_lossy_image_recompress_quality = quality_str
- if gray:
- if lossless:
- opts.gray_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
- opts.gray_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.gray_lossless_image_subsample_to = dpi_target
- opts.gray_lossless_image_subsample_threshold = dpi_threshold
- opts.gray_lossless_image_recompress_quality = quality_str
- if lossy:
- opts.gray_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
- opts.gray_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
- opts.gray_lossy_image_subsample_threshold = dpi_threshold
- opts.gray_lossy_image_subsample_to = dpi_target
- opts.gray_lossy_image_recompress_quality = quality_str
- else:
- opts = options
- dir2 = set(dir(opts)) # checking that only possible options were used
- invalid_options = dir2 - dir1
- if invalid_options:
- raise ValueError(f"Invalid options: {invalid_options}")
- if set_to_gray:
- self.recolor(1)
- pdf = _as_pdf_document(self)
- mupdf.pdf_rewrite_images(pdf, opts)
- def recolor(self, components=1):
- """Change the color component count on all pages.
- Args:
- components: (int) desired color component count, one of 1, 3, 4.
- Invokes the same-named method for all pages.
- """
- if not self.is_pdf:
- raise ValueError("is no PDF")
- for i in range(self.page_count):
- self.load_page(i).recolor(components)
- def resolve_names(self):
- """Convert the PDF's destination names into a Python dict.
- The only parameter is the pymupdf.Document.
- All names found in the catalog under keys "/Dests" and "/Names/Dests" are
- being included.
- Returns:
- A dcitionary with the following layout:
- - key: (str) the name
- - value: (dict) with the following layout:
- * "page": target page number (0-based). If no page number found -1.
- * "to": (x, y) target point on page - currently in PDF coordinates,
- i.e. point (0,0) is the bottom-left of the page.
- * "zoom": (float) the zoom factor
- * "dest": (str) only occurs if the target location on the page has
- not been provided as "/XYZ" or if no page number was found.
- Examples:
- {'__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0},
- '__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}}
- or
- '21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, ...
- """
- if hasattr(self, "_resolved_names"): # do not execute multiple times!
- return self._resolved_names
- # this is a backward listing of page xref to page number
- page_xrefs = {self.page_xref(i): i for i in range(self.page_count)}
- def obj_string(obj):
- """Return string version of a PDF object definition."""
- buffer = mupdf.fz_new_buffer(512)
- output = mupdf.FzOutput(buffer)
- mupdf.pdf_print_obj(output, obj, 1, 0)
- output.fz_close_output()
- return JM_UnicodeFromBuffer(buffer)
- def get_array(val):
- """Generate value of one item of the names dictionary."""
- templ_dict = {"page": -1, "dest": ""} # value template
- if val.pdf_is_indirect():
- val = mupdf.pdf_resolve_indirect(val)
- if val.pdf_is_array():
- array = obj_string(val)
- elif val.pdf_is_dict():
- array = obj_string(mupdf.pdf_dict_gets(val, "D"))
- else: # if all fails return the empty template
- return templ_dict
- # replace PDF "null" by zero, omit the square brackets
- array = array.replace("null", "0")[1:-1]
- # find stuff before first "/"
- idx = array.find("/")
- if idx < 1: # this has no target page spec
- templ_dict["dest"] = array # return the orig. string
- return templ_dict
- subval = array[:idx].strip() # stuff before "/"
- array = array[idx:] # stuff from "/" onwards
- templ_dict["dest"] = array
- # if we start with /XYZ: extract x, y, zoom
- # 1, 2 or 3 of these values may actually be supplied
- if array.startswith("/XYZ"):
- del templ_dict["dest"] # don't return orig string in this case
- # make a list of the 3 tokens following "/XYZ"
- array_list = array.split()[1:4] # omit "/XYZ"
- # fill up missing tokens with "0" strings
- while len(array_list) < 3: # fill up if too short
- array_list.append("0") # add missing values
- # make list of 3 floats: x, y and zoom
- t = list(map(float, array_list)) # the resulting x, y, z values
- templ_dict["to"] = (t[0], t[1])
- templ_dict["zoom"] = t[2]
- # extract page number
- if subval.endswith("0 R"): # page xref given?
- templ_dict["page"] = page_xrefs.get(int(subval.split()[0]),-1)
- else: # naked page number given
- templ_dict["page"] = int(subval)
- return templ_dict
- def fill_dict(dest_dict, pdf_dict):
- """Generate name resolution items for pdf_dict.
- This may be either "/Names/Dests" or just "/Dests"
- """
- # length of the PDF dictionary
- name_count = mupdf.pdf_dict_len(pdf_dict)
- # extract key-val of each dict item
- for i in range(name_count):
- key = mupdf.pdf_dict_get_key(pdf_dict, i)
- val = mupdf.pdf_dict_get_val(pdf_dict, i)
- if key.pdf_is_name(): # this should always be true!
- dict_key = key.pdf_to_name()
- else:
- message(f"key {i} is no /Name")
- dict_key = None
- if dict_key:
- dest_dict[dict_key] = get_array(val) # store key/value in dict
- # access underlying PDF document of fz Document
- pdf = mupdf.pdf_document_from_fz_document(self)
- # access PDF catalog
- catalog = mupdf.pdf_dict_gets(mupdf.pdf_trailer(pdf), "Root")
- dest_dict = {}
- # make PDF_NAME(Dests)
- dests = mupdf.pdf_new_name("Dests")
- # extract destinations old style (PDF 1.1)
- old_dests = mupdf.pdf_dict_get(catalog, dests)
- if old_dests.pdf_is_dict():
- fill_dict(dest_dict, old_dests)
- # extract destinations new style (PDF 1.2+)
- tree = mupdf.pdf_load_name_tree(pdf, dests)
- if tree.pdf_is_dict():
- fill_dict(dest_dict, tree)
- self._resolved_names = dest_dict # store result or reuse
- return dest_dict
- def save(
- self,
- filename,
- garbage=0,
- clean=0,
- deflate=0,
- deflate_images=0,
- deflate_fonts=0,
- incremental=0,
- ascii=0,
- expand=0,
- linear=0,
- no_new_id=0,
- appearance=0,
- pretty=0,
- encryption=1,
- permissions=4095,
- owner_pw=None,
- user_pw=None,
- preserve_metadata=1,
- use_objstms=0,
- compression_effort=0,
- ):
- # From %pythonprepend save
- #
- """Save PDF to file, pathlib.Path or file pointer."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if type(filename) is str:
- pass
- elif hasattr(filename, "open"): # assume: pathlib.Path
- filename = str(filename)
- elif hasattr(filename, "name"): # assume: file object
- filename = filename.name
- elif not hasattr(filename, "seek"): # assume file object
- raise ValueError("filename must be str, Path or file object")
- if filename == self.name and not incremental:
- raise ValueError("save to original must be incremental")
- if linear and use_objstms:
- raise ValueError("'linear' and 'use_objstms' cannot both be requested")
- if self.page_count < 1:
- raise ValueError("cannot save with zero pages")
- if incremental:
- if self.name != filename or self.stream:
- raise ValueError("incremental needs original file")
- if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40:
- raise ValueError("password length must not exceed 40")
-
- pdf = _as_pdf_document(self)
- opts = mupdf.PdfWriteOptions()
- opts.do_incremental = incremental
- opts.do_ascii = ascii
- opts.do_compress = deflate
- opts.do_compress_images = deflate_images
- opts.do_compress_fonts = deflate_fonts
- opts.do_decompress = expand
- opts.do_garbage = garbage
- opts.do_pretty = pretty
- opts.do_linear = linear
- opts.do_clean = clean
- opts.do_sanitize = clean
- opts.dont_regenerate_id = no_new_id
- opts.do_appearance = appearance
- opts.do_encrypt = encryption
- opts.permissions = permissions
- if owner_pw is not None:
- opts.opwd_utf8_set_value(owner_pw)
- elif user_pw is not None:
- opts.opwd_utf8_set_value(user_pw)
- if user_pw is not None:
- opts.upwd_utf8_set_value(user_pw)
- opts.do_preserve_metadata = preserve_metadata
- opts.do_use_objstms = use_objstms
- opts.compression_effort = compression_effort
- out = None
- pdf.m_internal.resynth_required = 0
- JM_embedded_clean(pdf)
- if no_new_id == 0:
- JM_ensure_identity(pdf)
- if isinstance(filename, str):
- #log( 'calling mupdf.pdf_save_document()')
- mupdf.pdf_save_document(pdf, filename, opts)
- else:
- out = JM_new_output_fileptr(filename)
- #log( f'{type(out)=} {type(out.this)=}')
- mupdf.pdf_write_document(pdf, out, opts)
- out.fz_close_output()
- def save_snapshot(self, filename):
- """Save a file snapshot suitable for journalling."""
- if self.is_closed:
- raise ValueError("doc is closed")
- if type(filename) is str:
- pass
- elif hasattr(filename, "open"): # assume: pathlib.Path
- filename = str(filename)
- elif hasattr(filename, "name"): # assume: file object
- filename = filename.name
- else:
- raise ValueError("filename must be str, Path or file object")
- if filename == self.name:
- raise ValueError("cannot snapshot to original")
- pdf = _as_pdf_document(self)
- mupdf.pdf_save_snapshot(pdf, filename)
- def saveIncr(self):
- """ Save PDF incrementally"""
- return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
- def select(self, pyliste):
- """Build sub-pdf with page numbers in the list."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if not self.is_pdf:
- raise ValueError("is no PDF")
- if not hasattr(pyliste, "__getitem__"):
- raise ValueError("sequence required")
- valid_range = range(len(self))
- if (len(pyliste) == 0
- or min(pyliste) not in valid_range
- or max(pyliste) not in valid_range
- ):
- raise ValueError("bad page number(s)")
- # get underlying pdf document,
- pdf = _as_pdf_document(self)
- # create page sub-pdf via pdf_rearrange_pages2().
- #
- if mupdf_version_tuple >= (1, 25, 3):
- # We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree
- # which, for example, breaks test_3705.
- mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP)
- else:
- mupdf.pdf_rearrange_pages2(pdf, pyliste)
- # remove any existing pages with their kids
- self._reset_page_refs()
- def set_language(self, language=None):
- pdf = _as_pdf_document(self)
- if not language:
- lang = mupdf.FZ_LANG_UNSET
- else:
- lang = mupdf.fz_text_language_from_string(language)
- mupdf.pdf_set_document_language(pdf, lang)
- return True
- def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, locked=None):
- """Set the PDF keys /ON, /OFF, /RBGroups of an OC layer."""
- if self.is_closed:
- raise ValueError("document closed")
- ocgs = set(self.get_ocgs().keys())
- if ocgs == set():
- raise ValueError("document has no optional content")
- if on:
- if type(on) not in (list, tuple):
- raise ValueError("bad type: 'on'")
- s = set(on).difference(ocgs)
- if s != set():
- raise ValueError("bad OCGs in 'on': %s" % s)
- if off:
- if type(off) not in (list, tuple):
- raise ValueError("bad type: 'off'")
- s = set(off).difference(ocgs)
- if s != set():
- raise ValueError("bad OCGs in 'off': %s" % s)
- if locked:
- if type(locked) not in (list, tuple):
- raise ValueError("bad type: 'locked'")
- s = set(locked).difference(ocgs)
- if s != set():
- raise ValueError("bad OCGs in 'locked': %s" % s)
- if rbgroups:
- if type(rbgroups) not in (list, tuple):
- raise ValueError("bad type: 'rbgroups'")
- for x in rbgroups:
- if not type(x) in (list, tuple):
- raise ValueError("bad RBGroup '%s'" % x)
- s = set(x).difference(ocgs)
- if s != set():
- raise ValueError("bad OCGs in RBGroup: %s" % s)
- if basestate:
- basestate = str(basestate).upper()
- if basestate == "UNCHANGED":
- basestate = "Unchanged"
- if basestate not in ("ON", "OFF", "Unchanged"):
- raise ValueError("bad 'basestate'")
- pdf = _as_pdf_document(self)
- ocp = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('OCProperties'),
- )
- if not ocp.m_internal:
- return
- if config == -1:
- obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
- else:
- obj = mupdf.pdf_array_get(
- mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
- config,
- )
- if not obj.m_internal:
- raise ValueError( MSG_BAD_OC_CONFIG)
- JM_set_ocg_arrays( obj, basestate, on, off, rbgroups, locked)
- mupdf.ll_pdf_read_ocg( pdf.m_internal)
- def set_layer_ui_config(self, number, action=0):
- """Set / unset OC intent configuration."""
- # The user might have given the name instead of sequence number,
- # so select by that name and continue with corresp. number
- if isinstance(number, str):
- select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number]
- if select == []:
- raise ValueError(f"bad OCG '{number}'.")
- number = select[0] # this is the number for the name
- pdf = _as_pdf_document(self)
- if action == 1:
- mupdf.pdf_toggle_layer_config_ui(pdf, number)
- elif action == 2:
- mupdf.pdf_deselect_layer_config_ui(pdf, number)
- else:
- mupdf.pdf_select_layer_config_ui(pdf, number)
- def set_markinfo(self, markinfo: dict) -> bool:
- """Set the PDF MarkInfo values."""
- xref = self.pdf_catalog()
- if xref == 0:
- raise ValueError("not a PDF")
- if not markinfo or not isinstance(markinfo, dict):
- return False
- valid = {"Marked": False, "UserProperties": False, "Suspects": False}
- if not set(valid.keys()).issuperset(markinfo.keys()):
- badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}"
- raise ValueError(badkeys)
- pdfdict = "<<"
- valid.update(markinfo)
- for key, value in valid.items():
- value=str(value).lower()
- if value not in ("true", "false"):
- raise ValueError(f"bad key value '{key}': '{value}'")
- pdfdict += f"/{key} {value}"
- pdfdict += ">>"
- self.xref_set_key(xref, "MarkInfo", pdfdict)
- return True
- def set_pagelayout(self, pagelayout: str):
- """Set the PDF PageLayout value."""
- valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
- xref = self.pdf_catalog()
- if xref == 0:
- raise ValueError("not a PDF")
- if not pagelayout:
- raise ValueError("bad PageLayout value")
- if pagelayout[0] == "/":
- pagelayout = pagelayout[1:]
- for v in valid:
- if pagelayout.lower() == v.lower():
- self.xref_set_key(xref, "PageLayout", f"/{v}")
- return True
- raise ValueError("bad PageLayout value")
- def set_pagemode(self, pagemode: str):
- """Set the PDF PageMode value."""
- valid = ("UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments")
- xref = self.pdf_catalog()
- if xref == 0:
- raise ValueError("not a PDF")
- if not pagemode:
- raise ValueError("bad PageMode value")
- if pagemode[0] == "/":
- pagemode = pagemode[1:]
- for v in valid:
- if pagemode.lower() == v.lower():
- self.xref_set_key(xref, "PageMode", f"/{v}")
- return True
- raise ValueError("bad PageMode value")
- def set_xml_metadata(self, metadata):
- """Store XML document level metadata."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- if not root.m_internal:
- RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
- res = mupdf.fz_new_buffer_from_copied_data( metadata.encode('utf-8'))
- xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
- if xml.m_internal:
- JM_update_stream( pdf, xml, res, 0)
- else:
- xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0)
- mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata'))
- mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
- mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
- def switch_layer(self, config, as_default=0):
- """Activate an OC layer."""
- pdf = _as_pdf_document(self)
- cfgs = mupdf.pdf_dict_getl(
- mupdf.pdf_trailer( pdf),
- PDF_NAME('Root'),
- PDF_NAME('OCProperties'),
- PDF_NAME('Configs')
- )
- if not mupdf.pdf_is_array( cfgs) or not mupdf.pdf_array_len( cfgs):
- if config < 1:
- return
- raise ValueError( MSG_BAD_OC_LAYER)
- if config < 0:
- return
- mupdf.pdf_select_layer_config( pdf, config)
- if as_default:
- mupdf.pdf_set_layer_config_as_default( pdf)
- mupdf.ll_pdf_read_ocg( pdf.m_internal)
- def update_object(self, xref, text, page=None):
- """Replace object definition source."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- if not _INRANGE(xref, 1, xreflen-1):
- RAISEPY("bad xref", MSG_BAD_XREF)
- ENSURE_OPERATION(pdf)
- # create new object with passed-in string
- new_obj = JM_pdf_obj_from_str(pdf, text)
- mupdf.pdf_update_object(pdf, xref, new_obj)
- if page:
- JM_refresh_links( _as_pdf_page(page))
- def update_stream(self, xref=0, stream=None, new=1, compress=1):
- """Replace xref stream part."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- if xref < 1 or xref > xreflen:
- raise ValueError( MSG_BAD_XREF)
- # get the object
- obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not mupdf.pdf_is_dict(obj):
- raise ValueError( MSG_IS_NO_DICT)
- res = JM_BufferFromBytes(stream)
- if not res.m_internal:
- raise TypeError( MSG_BAD_BUFFER)
- JM_update_stream(pdf, obj, res, compress)
- pdf.dirty = 1
- @property
- def version_count(self):
- '''
- Count versions of PDF document.
- '''
- pdf = _as_pdf_document(self, required=0)
- if pdf.m_internal:
- return mupdf.pdf_count_versions(pdf)
- return 0
- def write(
- self,
- garbage=False,
- clean=False,
- deflate=False,
- deflate_images=False,
- deflate_fonts=False,
- incremental=False,
- ascii=False,
- expand=False,
- linear=False,
- no_new_id=False,
- appearance=False,
- pretty=False,
- encryption=1,
- permissions=4095,
- owner_pw=None,
- user_pw=None,
- preserve_metadata=1,
- use_objstms=0,
- compression_effort=0,
- ):
- from io import BytesIO
- bio = BytesIO()
- self.save(
- bio,
- garbage=garbage,
- clean=clean,
- no_new_id=no_new_id,
- appearance=appearance,
- deflate=deflate,
- deflate_images=deflate_images,
- deflate_fonts=deflate_fonts,
- incremental=incremental,
- ascii=ascii,
- expand=expand,
- linear=linear,
- pretty=pretty,
- encryption=encryption,
- permissions=permissions,
- owner_pw=owner_pw,
- user_pw=user_pw,
- preserve_metadata=preserve_metadata,
- use_objstms=use_objstms,
- compression_effort=compression_effort,
- )
- return bio.getvalue()
- @property
- def xref(self):
- """PDF xref number of page."""
- CheckParent(self)
- return self.parent.page_xref(self.number)
- def xref_get_key(self, xref, key):
- """Get PDF dict key value of object at 'xref'."""
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref > 0:
- obj = mupdf.pdf_load_object(pdf, xref)
- else:
- obj = mupdf.pdf_trailer(pdf)
- if not obj.m_internal:
- return ("null", "null")
- subobj = mupdf.pdf_dict_getp(obj, key)
- if not subobj.m_internal:
- return ("null", "null")
- text = None
- if mupdf.pdf_is_indirect(subobj):
- type = "xref"
- text = "%i 0 R" % mupdf.pdf_to_num(subobj)
- elif mupdf.pdf_is_array(subobj):
- type = "array"
- elif mupdf.pdf_is_dict(subobj):
- type = "dict"
- elif mupdf.pdf_is_int(subobj):
- type = "int"
- text = "%i" % mupdf.pdf_to_int(subobj)
- elif mupdf.pdf_is_real(subobj):
- type = "float"
- elif mupdf.pdf_is_null(subobj):
- type = "null"
- text = "null"
- elif mupdf.pdf_is_bool(subobj):
- type = "bool"
- if mupdf.pdf_to_bool(subobj):
- text = "true"
- else:
- text = "false"
- elif mupdf.pdf_is_name(subobj):
- type = "name"
- text = "/%s" % mupdf.pdf_to_name(subobj)
- elif mupdf.pdf_is_string(subobj):
- type = "string"
- text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj))
- else:
- type = "unknown"
- if text is None:
- res = JM_object_to_buffer(subobj, 1, 0)
- text = JM_UnicodeFromBuffer(res)
- return (type, text)
- def xref_get_keys(self, xref):
- """Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer."""
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len( pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref > 0:
- obj = mupdf.pdf_load_object( pdf, xref)
- else:
- obj = mupdf.pdf_trailer( pdf)
- n = mupdf.pdf_dict_len( obj)
- rc = []
- if n == 0:
- return rc
- for i in range(n):
- key = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( obj, i))
- rc.append(key)
- return rc
- def xref_is_font(self, xref):
- """Check if xref is a font object."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if self.xref_get_key(xref, "Type")[1] == "/Font":
- return True
- return False
- def xref_is_image(self, xref):
- """Check if xref is an image object."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if self.xref_get_key(xref, "Subtype")[1] == "/Image":
- return True
- return False
- def xref_is_stream(self, xref=0):
- """Check if xref is a stream object."""
- pdf = _as_pdf_document(self, required=0)
- if not pdf.m_internal:
- return False # not a PDF
- return bool(mupdf.pdf_obj_num_is_stream(pdf, xref))
- def xref_is_xobject(self, xref):
- """Check if xref is a form xobject."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- if self.xref_get_key(xref, "Subtype")[1] == "/Form":
- return True
- return False
- def xref_length(self):
- """Get length of xref table."""
- xreflen = 0
- pdf = _as_pdf_document(self, required=0)
- if pdf.m_internal:
- xreflen = mupdf.pdf_xref_len(pdf)
- return xreflen
- def xref_object(self, xref, compressed=0, ascii=0):
- """Get xref object source as a string."""
- if self.is_closed:
- raise ValueError("document closed")
- if g_use_extra:
- ret = extra.xref_object( self.this, xref, compressed, ascii)
- return ret
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref > 0:
- obj = mupdf.pdf_load_object(pdf, xref)
- else:
- obj = mupdf.pdf_trailer(pdf)
- res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii)
- text = JM_EscapeStrFromBuffer(res)
- return text
- def xref_set_key(self, xref, key, value):
- """Set the value of a PDF dictionary key."""
- if self.is_closed:
- raise ValueError("document closed")
- if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}):
- raise ValueError("bad 'key'")
- if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set():
- raise ValueError("bad 'value'")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len(pdf)
- #if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- # THROWMSG("bad xref")
- #if len(value) == 0:
- # THROWMSG("bad 'value'")
- #if len(key) == 0:
- # THROWMSG("bad 'key'")
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref != -1:
- obj = mupdf.pdf_load_object(pdf, xref)
- else:
- obj = mupdf.pdf_trailer(pdf)
- new_obj = JM_set_object_value(obj, key, value)
- if not new_obj.m_internal:
- return # did not work: skip update
- if xref != -1:
- mupdf.pdf_update_object(pdf, xref, new_obj)
- else:
- n = mupdf.pdf_dict_len(new_obj)
- for i in range(n):
- mupdf.pdf_dict_put(
- obj,
- mupdf.pdf_dict_get_key(new_obj, i),
- mupdf.pdf_dict_get_val(new_obj, i),
- )
- def xref_stream(self, xref):
- """Get decompressed xref stream."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len( pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref >= 0:
- obj = mupdf.pdf_new_indirect( pdf, xref, 0)
- else:
- obj = mupdf.pdf_trailer( pdf)
- r = None
- if mupdf.pdf_is_stream( obj):
- res = mupdf.pdf_load_stream_number( pdf, xref)
- r = JM_BinFromBuffer( res)
- return r
- def xref_stream_raw(self, xref):
- """Get xref stream without decompression."""
- if self.is_closed or self.is_encrypted:
- raise ValueError("document closed or encrypted")
- pdf = _as_pdf_document(self)
- xreflen = mupdf.pdf_xref_len( pdf)
- if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
- raise ValueError( MSG_BAD_XREF)
- if xref >= 0:
- obj = mupdf.pdf_new_indirect( pdf, xref, 0)
- else:
- obj = mupdf.pdf_trailer( pdf)
- r = None
- if mupdf.pdf_is_stream( obj):
- res = mupdf.pdf_load_raw_stream_number( pdf, xref)
- r = JM_BinFromBuffer( res)
- return r
- def xref_xml_metadata(self):
- """Get xref of document XML metadata."""
- pdf = _as_pdf_document(self)
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- if not root.m_internal:
- RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
- xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
- xref = 0
- if xml.m_internal:
- xref = mupdf.pdf_to_num( xml)
- return xref
-
- __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
-
- outline = property(lambda self: self._outline)
- tobytes = write
- is_stream = xref_is_stream
- open = Document
- class DocumentWriter:
- def __enter__(self):
- return self
- def __exit__(self, *args):
- self.close()
- def __init__(self, path, options=''):
- if isinstance( path, str):
- pass
- elif hasattr( path, 'absolute'):
- path = str( path)
- elif hasattr( path, 'name'):
- path = path.name
- if isinstance( path, str):
- self.this = mupdf.FzDocumentWriter( path, options, mupdf.FzDocumentWriter.PathType_PDF)
- else:
- # Need to keep the Python JM_new_output_fileptr_Output instance
- # alive for the lifetime of this DocumentWriter, otherwise calls
- # to virtual methods implemented in Python fail. So we make it a
- # member of this DocumentWriter.
- #
- # Unrelated to this, mupdf.FzDocumentWriter will set
- # self._out.m_internal to null because ownership is passed in.
- #
- out = JM_new_output_fileptr( path)
- self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF)
- assert out.m_internal_value() == 0
- assert hasattr( self.this, '_out')
-
- def begin_page( self, mediabox):
- mediabox2 = JM_rect_from_py(mediabox)
- device = mupdf.fz_begin_page( self.this, mediabox2)
- device_wrapper = DeviceWrapper( device)
- return device_wrapper
-
- def close( self):
- mupdf.fz_close_document_writer( self.this)
-
- def end_page( self):
- mupdf.fz_end_page( self.this)
- class Font:
- def __del__(self):
- if type(self) is not Font:
- return None
- def __init__(
- self,
- fontname=None,
- fontfile=None,
- fontbuffer=None,
- script=0,
- language=None,
- ordering=-1,
- is_bold=0,
- is_italic=0,
- is_serif=0,
- embed=1,
- ):
-
- if fontbuffer:
- if hasattr(fontbuffer, "getvalue"):
- fontbuffer = fontbuffer.getvalue()
- elif isinstance(fontbuffer, bytearray):
- fontbuffer = bytes(fontbuffer)
- if not isinstance(fontbuffer, bytes):
- raise ValueError("bad type: 'fontbuffer'")
-
- if isinstance(fontname, str):
- fname_lower = fontname.lower()
- if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower:
- message("Warning: did you mean a fontfile?")
- if fname_lower in ("cjk", "china-t", "china-ts"):
- ordering = 0
- elif fname_lower.startswith("china-s"):
- ordering = 1
- elif fname_lower.startswith("korea"):
- ordering = 3
- elif fname_lower.startswith("japan"):
- ordering = 2
- elif fname_lower in fitz_fontdescriptors.keys():
- import pymupdf_fonts # optional fonts
- fontbuffer = pymupdf_fonts.myfont(fname_lower) # make a copy
- fontname = None # ensure using fontbuffer only
- del pymupdf_fonts # remove package again
- elif ordering < 0:
- fontname = Base14_fontdict.get(fontname, fontname)
- lang = mupdf.fz_text_language_from_string(language)
- font = JM_get_font(fontname, fontfile,
- fontbuffer, script, lang, ordering,
- is_bold, is_italic, is_serif, embed)
- self.this = font
- def __repr__(self):
- return "Font('%s')" % self.name
- @property
- def ascender(self):
- """Return the glyph ascender value."""
- return mupdf.fz_font_ascender(self.this)
- @property
- def bbox(self):
- return self.this.fz_font_bbox()
-
- @property
- def buffer(self):
- buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer))
- return mupdf.fz_buffer_extract_copy( buffer_)
- def char_lengths(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
- """Return tuple of char lengths of unicode 'text' under a fontsize."""
- lang = mupdf.fz_text_language_from_string(language)
- rc = []
- for ch in text:
- c = ord(ch)
- if small_caps:
- gid = mupdf.fz_encode_character_sc(self.this, c)
- if gid >= 0:
- font = self.this
- else:
- gid, font = mupdf.fz_encode_character_with_fallback(self.this, c, script, lang)
- rc.append(fontsize * mupdf.fz_advance_glyph(font, gid, wmode))
- return rc
- @property
- def descender(self):
- """Return the glyph descender value."""
- return mupdf.fz_font_descender(self.this)
- @property
- def flags(self):
- f = mupdf.ll_fz_font_flags(self.this.m_internal)
- if not f:
- return
- assert isinstance( f, mupdf.fz_font_flags_t)
- #log( '{=f}')
- if mupdf_cppyy:
- # cppyy includes remaining higher bits.
- v = [f.is_mono]
- def b(bits):
- ret = v[0] & ((1 << bits)-1)
- v[0] = v[0] >> bits
- return ret
- is_mono = b(1)
- is_serif = b(1)
- is_bold = b(1)
- is_italic = b(1)
- ft_substitute = b(1)
- ft_stretch = b(1)
- fake_bold = b(1)
- fake_italic = b(1)
- has_opentype = b(1)
- invalid_bbox = b(1)
- cjk_lang = b(1)
- embed = b(1)
- never_embed = b(1)
- return {
- "mono": is_mono if mupdf_cppyy else f.is_mono,
- "serif": is_serif if mupdf_cppyy else f.is_serif,
- "bold": is_bold if mupdf_cppyy else f.is_bold,
- "italic": is_italic if mupdf_cppyy else f.is_italic,
- "substitute": ft_substitute if mupdf_cppyy else f.ft_substitute,
- "stretch": ft_stretch if mupdf_cppyy else f.ft_stretch,
- "fake-bold": fake_bold if mupdf_cppyy else f.fake_bold,
- "fake-italic": fake_italic if mupdf_cppyy else f.fake_italic,
- "opentype": has_opentype if mupdf_cppyy else f.has_opentype,
- "invalid-bbox": invalid_bbox if mupdf_cppyy else f.invalid_bbox,
- 'cjk': cjk_lang if mupdf_cppyy else f.cjk,
- 'cjk-lang': cjk_lang if mupdf_cppyy else f.cjk_lang,
- 'embed': embed if mupdf_cppyy else f.embed,
- 'never-embed': never_embed if mupdf_cppyy else f.never_embed,
- }
- def glyph_advance(self, chr_, language=None, script=0, wmode=0, small_caps=0):
- """Return the glyph width of a unicode (font size 1)."""
- lang = mupdf.fz_text_language_from_string(language)
- if small_caps:
- gid = mupdf.fz_encode_character_sc(self.this, chr_)
- if gid >= 0:
- font = self.this
- else:
- gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr_, script, lang)
- return mupdf.fz_advance_glyph(font, gid, wmode)
- def glyph_bbox(self, char, language=None, script=0, small_caps=0):
- """Return the glyph bbox of a unicode (font size 1)."""
- lang = mupdf.fz_text_language_from_string(language)
- if small_caps:
- gid = mupdf.fz_encode_character_sc( self.this, char)
- if gid >= 0:
- font = self.this
- else:
- gid, font = mupdf.fz_encode_character_with_fallback( self.this, char, script, lang)
- return Rect(mupdf.fz_bound_glyph( font, gid, mupdf.FzMatrix()))
- @property
- def glyph_count(self):
- return self.this.m_internal.glyph_count
- def glyph_name_to_unicode(self, name):
- """Return the unicode for a glyph name."""
- return glyph_name_to_unicode(name)
- def has_glyph(self, chr, language=None, script=0, fallback=0, small_caps=0):
- """Check whether font has a glyph for this unicode."""
- if fallback:
- lang = mupdf.fz_text_language_from_string(language)
- gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr, script, lang)
- else:
- if small_caps:
- gid = mupdf.fz_encode_character_sc(self.this, chr)
- else:
- gid = mupdf.fz_encode_character(self.this, chr)
- return gid
- @property
- def is_bold(self):
- return mupdf.fz_font_is_bold( self.this)
- @property
- def is_italic(self):
- return mupdf.fz_font_is_italic( self.this)
- @property
- def is_monospaced(self):
- return mupdf.fz_font_is_monospaced( self.this)
- @property
- def is_serif(self):
- return mupdf.fz_font_is_serif( self.this)
- @property
- def is_writable(self):
- return True # see pymupdf commit ef4056ee4da2
- font = self.this
- flags = mupdf.ll_fz_font_flags(font.m_internal)
- if mupdf_cppyy:
- # cppyy doesn't handle bitfields correctly.
- import cppyy
- ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags)
- else:
- ft_substitute = flags.ft_substitute
-
- if ( mupdf.ll_fz_font_t3_procs(font.m_internal)
- or ft_substitute
- or not mupdf.pdf_font_writing_supported(font)
- ):
- return False
- return True
- @property
- def name(self):
- ret = mupdf.fz_font_name(self.this)
- #log( '{ret=}')
- return ret
- def text_length(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
- """Return length of unicode 'text' under a fontsize."""
- thisfont = self.this
- lang = mupdf.fz_text_language_from_string(language)
- rc = 0
- if not isinstance(text, str):
- raise TypeError( MSG_BAD_TEXT)
- for ch in text:
- c = ord(ch)
- if small_caps:
- gid = mupdf.fz_encode_character_sc(thisfont, c)
- if gid >= 0:
- font = thisfont
- else:
- gid, font = mupdf.fz_encode_character_with_fallback(thisfont, c, script, lang)
- rc += mupdf.fz_advance_glyph(font, gid, wmode)
- rc *= fontsize
- return rc
- def unicode_to_glyph_name(self, ch):
- """Return the glyph name for a unicode."""
- return unicode_to_glyph_name(ch)
- def valid_codepoints(self):
- '''
- Returns sorted list of valid unicodes of a fz_font.
- '''
- ucs_gids = mupdf.fz_enumerate_font_cmap2(self.this)
- ucss = [i.ucs for i in ucs_gids]
- ucss_unique = set(ucss)
- ucss_unique_sorted = sorted(ucss_unique)
- return ucss_unique_sorted
- class Graftmap:
- def __del__(self):
- if not type(self) is Graftmap:
- return
- self.thisown = False
- def __init__(self, doc):
- dst = _as_pdf_document(doc)
- map_ = mupdf.pdf_new_graft_map(dst)
- self.this = map_
- self.thisown = True
- class Link:
- def __del__(self):
- self._erase()
- def __init__( self, this):
- assert isinstance( this, mupdf.FzLink)
- self.this = this
- def __repr__(self):
- CheckParent(self)
- return "link on " + str(self.parent)
- def __str__(self):
- CheckParent(self)
- return "link on " + str(self.parent)
- def _border(self, doc, xref):
- pdf = _as_pdf_document(doc, required=0)
- if not pdf.m_internal:
- return
- link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not link_obj.m_internal:
- return
- b = JM_annot_border(link_obj)
- return b
- def _colors(self, doc, xref):
- pdf = _as_pdf_document(doc, required=0)
- if not pdf.m_internal:
- return
- link_obj = mupdf.pdf_new_indirect( pdf, xref, 0)
- if not link_obj.m_internal:
- raise ValueError( MSG_BAD_XREF)
- b = JM_annot_colors( link_obj)
- return b
- def _erase(self):
- self.parent = None
- self.thisown = False
- def _setBorder(self, border, doc, xref):
- pdf = _as_pdf_document(doc, required=0)
- if not pdf.m_internal:
- return
- link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not link_obj.m_internal:
- return
- b = JM_annot_set_border(border, pdf, link_obj)
- return b
-
- @property
- def border(self):
- return self._border(self.parent.parent.this, self.xref)
- @property
- def colors(self):
- return self._colors(self.parent.parent.this, self.xref)
- @property
- def dest(self):
- """Create link destination details."""
- if hasattr(self, "parent") and self.parent is None:
- raise ValueError("orphaned object: parent is None")
- if self.parent.parent.is_closed or self.parent.parent.is_encrypted:
- raise ValueError("document closed or encrypted")
- doc = self.parent.parent
- if self.is_external or self.uri.startswith("#"):
- uri = None
- else:
- uri = doc.resolve_link(self.uri)
- return linkDest(self, uri, doc)
- @property
- def flags(self)->int:
- CheckParent(self)
- doc = self.parent.parent
- if not doc.is_pdf:
- return 0
- f = doc.xref_get_key(self.xref, "F")
- if f[1] != "null":
- return int(f[1])
- return 0
- @property
- def is_external(self):
- """Flag the link as external."""
- CheckParent(self)
- if g_use_extra:
- return extra.Link_is_external( self.this)
- this_link = self.this
- if not this_link.m_internal or not this_link.m_internal.uri:
- return False
- return bool( mupdf.fz_is_external_link( this_link.m_internal.uri))
- @property
- def next(self):
- """Next link."""
- if not self.this.m_internal:
- return None
- CheckParent(self)
- if 0 and g_use_extra:
- val = extra.Link_next( self.this)
- else:
- val = self.this.next()
- if not val.m_internal:
- return None
- val = Link( val)
- if val:
- val.thisown = True
- val.parent = self.parent # copy owning page from prev link
- val.parent._annot_refs[id(val)] = val
- if self.xref > 0: # prev link has an xref
- link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
- link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
- idx = link_xrefs.index(self.xref)
- val.xref = link_xrefs[idx + 1]
- val.id = link_ids[idx + 1]
- else:
- val.xref = 0
- val.id = ""
- return val
- @property
- def rect(self):
- """Rectangle ('hot area')."""
- CheckParent(self)
- # utils.py:getLinkDict() appears to expect exceptions from us, so we
- # ensure that we raise on error.
- if self.this is None or not self.this.m_internal:
- raise Exception( 'self.this.m_internal not available')
- val = JM_py_from_rect( self.this.rect())
- val = Rect(val)
- return val
- def set_border(self, border=None, width=0, dashes=None, style=None):
- if type(border) is not dict:
- border = {"width": width, "style": style, "dashes": dashes}
- return self._setBorder(border, self.parent.parent.this, self.xref)
- def set_colors(self, colors=None, stroke=None, fill=None):
- """Set border colors."""
- CheckParent(self)
- doc = self.parent.parent
- if type(colors) is not dict:
- colors = {"fill": fill, "stroke": stroke}
- fill = colors.get("fill")
- stroke = colors.get("stroke")
- if fill is not None:
- message("warning: links have no fill color")
- if stroke in ([], ()):
- doc.xref_set_key(self.xref, "C", "[]")
- return
- if hasattr(stroke, "__float__"):
- stroke = [float(stroke)]
- CheckColor(stroke)
- assert len(stroke) in (1, 3, 4)
- s = f"[{_format_g(stroke)}]"
- doc.xref_set_key(self.xref, "C", s)
- def set_flags(self, flags):
- CheckParent(self)
- doc = self.parent.parent
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- if not type(flags) is int:
- raise ValueError("bad 'flags' value")
- doc.xref_set_key(self.xref, "F", str(flags))
- return None
- @property
- def uri(self):
- """Uri string."""
- #CheckParent(self)
- if g_use_extra:
- return extra.link_uri(self.this)
- this_link = self.this
- return this_link.m_internal.uri if this_link.m_internal else ''
- page = -1
- class Matrix:
- def __abs__(self):
- return math.sqrt(sum([c*c for c in self]))
- def __add__(self, m):
- if hasattr(m, "__float__"):
- return Matrix(self.a + m, self.b + m, self.c + m,
- self.d + m, self.e + m, self.f + m)
- if len(m) != 6:
- raise ValueError("Matrix: bad seq len")
- return Matrix(self.a + m[0], self.b + m[1], self.c + m[2],
- self.d + m[3], self.e + m[4], self.f + m[5])
- def __bool__(self):
- return not (max(self) == min(self) == 0)
- def __eq__(self, mat):
- if not hasattr(mat, "__len__"):
- return False
- return len(mat) == 6 and not (self - mat)
- def __getitem__(self, i):
- return (self.a, self.b, self.c, self.d, self.e, self.f)[i]
- def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None):
- """
- Matrix() - all zeros
- Matrix(a, b, c, d, e, f)
- Matrix(zoom-x, zoom-y) - zoom
- Matrix(shear-x, shear-y, 1) - shear
- Matrix(degree) - rotate
- Matrix(Matrix) - new copy
- Matrix(sequence) - from 'sequence'
- Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix.
-
- Explicit keyword args a, b, c, d, e, f override any earlier settings if
- not None.
- """
- if not args:
- self.a = self.b = self.c = self.d = self.e = self.f = 0.0
- elif len(args) > 6:
- raise ValueError("Matrix: bad seq len")
- elif len(args) == 6: # 6 numbers
- self.a, self.b, self.c, self.d, self.e, self.f = map(float, args)
- elif len(args) == 1: # either an angle or a sequ
- if isinstance(args[0], mupdf.FzMatrix):
- self.a = args[0].a
- self.b = args[0].b
- self.c = args[0].c
- self.d = args[0].d
- self.e = args[0].e
- self.f = args[0].f
- elif hasattr(args[0], "__float__"):
- theta = math.radians(args[0])
- c_ = round(math.cos(theta), 8)
- s_ = round(math.sin(theta), 8)
- self.a = self.d = c_
- self.b = s_
- self.c = -s_
- self.e = self.f = 0.0
- else:
- self.a, self.b, self.c, self.d, self.e, self.f = map(float, args[0])
- elif len(args) == 2 or len(args) == 3 and args[2] == 0:
- self.a, self.b, self.c, self.d, self.e, self.f = float(args[0]), \
- 0.0, 0.0, float(args[1]), 0.0, 0.0
- elif len(args) == 3 and args[2] == 1:
- self.a, self.b, self.c, self.d, self.e, self.f = 1.0, \
- float(args[1]), float(args[0]), 1.0, 0.0, 0.0
- else:
- raise ValueError("Matrix: bad args")
-
- # Override with explicit args if specified.
- if a is not None: self.a = a
- if b is not None: self.b = b
- if c is not None: self.c = c
- if d is not None: self.d = d
- if e is not None: self.e = e
- if f is not None: self.f = f
- def __invert__(self):
- """Calculate inverted matrix."""
- m1 = Matrix()
- m1.invert(self)
- return m1
- def __len__(self):
- return 6
- def __mul__(self, m):
- if hasattr(m, "__float__"):
- return Matrix(self.a * m, self.b * m, self.c * m,
- self.d * m, self.e * m, self.f * m)
- m1 = Matrix(1,1)
- return m1.concat(self, m)
- def __neg__(self):
- return Matrix(-self.a, -self.b, -self.c, -self.d, -self.e, -self.f)
- def __nonzero__(self):
- return not (max(self) == min(self) == 0)
- def __pos__(self):
- return Matrix(self)
- def __repr__(self):
- return "Matrix" + str(tuple(self))
- def __setitem__(self, i, v):
- v = float(v)
- if i == 0: self.a = v
- elif i == 1: self.b = v
- elif i == 2: self.c = v
- elif i == 3: self.d = v
- elif i == 4: self.e = v
- elif i == 5: self.f = v
- else:
- raise IndexError("index out of range")
- return
- def __sub__(self, m):
- if hasattr(m, "__float__"):
- return Matrix(self.a - m, self.b - m, self.c - m,
- self.d - m, self.e - m, self.f - m)
- if len(m) != 6:
- raise ValueError("Matrix: bad seq len")
- return Matrix(self.a - m[0], self.b - m[1], self.c - m[2],
- self.d - m[3], self.e - m[4], self.f - m[5])
- def __truediv__(self, m):
- if hasattr(m, "__float__"):
- return Matrix(self.a * 1./m, self.b * 1./m, self.c * 1./m,
- self.d * 1./m, self.e * 1./m, self.f * 1./m)
- m1 = util_invert_matrix(m)[1]
- if not m1:
- raise ZeroDivisionError("matrix not invertible")
- m2 = Matrix(1,1)
- return m2.concat(self, m1)
- def concat(self, one, two):
- """Multiply two matrices and replace current one."""
- if not len(one) == len(two) == 6:
- raise ValueError("Matrix: bad seq len")
- self.a, self.b, self.c, self.d, self.e, self.f = util_concat_matrix(one, two)
- return self
- def invert(self, src=None):
- """Calculate the inverted matrix. Return 0 if successful and replace
- current one. Else return 1 and do nothing.
- """
- if src is None:
- dst = util_invert_matrix(self)
- else:
- dst = util_invert_matrix(src)
- if dst[0] == 1:
- return 1
- self.a, self.b, self.c, self.d, self.e, self.f = dst[1]
- return 0
- @property
- def is_rectilinear(self):
- """True if rectangles are mapped to rectangles."""
- return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \
- (abs(self.a) < EPSILON and abs(self.d) < EPSILON)
- def prerotate(self, theta):
- """Calculate pre rotation and replace current matrix."""
- theta = float(theta)
- while theta < 0: theta += 360
- while theta >= 360: theta -= 360
- if abs(0 - theta) < EPSILON:
- pass
- elif abs(90.0 - theta) < EPSILON:
- a = self.a
- b = self.b
- self.a = self.c
- self.b = self.d
- self.c = -a
- self.d = -b
- elif abs(180.0 - theta) < EPSILON:
- self.a = -self.a
- self.b = -self.b
- self.c = -self.c
- self.d = -self.d
- elif abs(270.0 - theta) < EPSILON:
- a = self.a
- b = self.b
- self.a = -self.c
- self.b = -self.d
- self.c = a
- self.d = b
- else:
- rad = math.radians(theta)
- s = math.sin(rad)
- c = math.cos(rad)
- a = self.a
- b = self.b
- self.a = c * a + s * self.c
- self.b = c * b + s * self.d
- self.c =-s * a + c * self.c
- self.d =-s * b + c * self.d
- return self
- def prescale(self, sx, sy):
- """Calculate pre scaling and replace current matrix."""
- sx = float(sx)
- sy = float(sy)
- self.a *= sx
- self.b *= sx
- self.c *= sy
- self.d *= sy
- return self
- def preshear(self, h, v):
- """Calculate pre shearing and replace current matrix."""
- h = float(h)
- v = float(v)
- a, b = self.a, self.b
- self.a += v * self.c
- self.b += v * self.d
- self.c += h * a
- self.d += h * b
- return self
- def pretranslate(self, tx, ty):
- """Calculate pre translation and replace current matrix."""
- tx = float(tx)
- ty = float(ty)
- self.e += tx * self.a + ty * self.c
- self.f += tx * self.b + ty * self.d
- return self
- __inv__ = __invert__
- __div__ = __truediv__
- norm = __abs__
- class IdentityMatrix(Matrix):
- """Identity matrix [1, 0, 0, 1, 0, 0]"""
- def __hash__(self):
- return hash((1,0,0,1,0,0))
- def __init__(self):
- Matrix.__init__(self, 1.0, 1.0)
- def __repr__(self):
- return "IdentityMatrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)"
- def __setattr__(self, name, value):
- if name in "ad":
- self.__dict__[name] = 1.0
- elif name in "bcef":
- self.__dict__[name] = 0.0
- else:
- self.__dict__[name] = value
- def checkargs(*args):
- raise NotImplementedError("Identity is readonly")
- Identity = IdentityMatrix()
- class linkDest:
- """link or outline destination details"""
- def __init__(self, obj, rlink, document=None):
- isExt = obj.is_external
- isInt = not isExt
- self.dest = ""
- self.file_spec = ""
- self.flags = 0
- self.is_map = False
- self.is_uri = False
- self.kind = LINK_NONE
- self.lt = Point(0, 0)
- self.named = dict()
- self.new_window = ""
- self.page = obj.page
- self.rb = Point(0, 0)
- self.uri = obj.uri
-
- def uri_to_dict(uri):
- items = self.uri[1:].split('&')
- ret = dict()
- for item in items:
- eq = item.find('=')
- if eq >= 0:
- ret[item[:eq]] = item[eq+1:]
- else:
- ret[item] = None
- return ret
- def unescape(name):
- """Unescape '%AB' substrings to chr(0xAB)."""
- split = name.replace("%%", "%25") # take care of escaped '%'
- split = split.split("%")
- newname = split[0]
- for item in split[1:]:
- piece = item[:2]
- newname += chr(int(piece, base=16))
- newname += item[2:]
- return newname
-
- if rlink and not self.uri.startswith("#"):
- self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}"
- if obj.is_external:
- self.page = -1
- self.kind = LINK_URI
- if not self.uri:
- self.page = -1
- self.kind = LINK_NONE
- if isInt and self.uri:
- self.uri = self.uri.replace("&zoom=nan", "&zoom=0")
- if self.uri.startswith("#"):
- self.kind = LINK_GOTO
- m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri)
- if m:
- self.page = int(m.group(1)) - 1
- self.lt = Point(float((m.group(3))), float(m.group(4)))
- self.flags = self.flags | LINK_FLAG_L_VALID | LINK_FLAG_T_VALID
- else:
- m = re.match('^#page=([0-9]+)$', self.uri)
- if m:
- self.page = int(m.group(1)) - 1
- else:
- self.kind = LINK_NAMED
- m = re.match('^#nameddest=(.*)', self.uri)
- assert document
- if document and m:
- named = unescape(m.group(1))
- self.named = document.resolve_names().get(named)
- if self.named is None:
- # document.resolve_names() does not contain an
- # entry for `named` so use an empty dict.
- self.named = dict()
- self.named['nameddest'] = named
- else:
- self.named = uri_to_dict(self.uri[1:])
- else:
- self.kind = LINK_NAMED
- self.named = uri_to_dict(self.uri)
- if obj.is_external:
- if not self.uri:
- pass
- elif self.uri.startswith("file:"):
- self.file_spec = self.uri[5:]
- if self.file_spec.startswith("//"):
- self.file_spec = self.file_spec[2:]
- self.is_uri = False
- self.uri = ""
- self.kind = LINK_LAUNCH
- ftab = self.file_spec.split("#")
- if len(ftab) == 2:
- if ftab[1].startswith("page="):
- self.kind = LINK_GOTOR
- self.file_spec = ftab[0]
- self.page = int(ftab[1].split("&")[0][5:]) - 1
- elif ":" in self.uri:
- self.is_uri = True
- self.kind = LINK_URI
- else:
- self.is_uri = True
- self.kind = LINK_LAUNCH
- assert isinstance(self.named, dict)
- class Widget:
- '''
- Class describing a PDF form field ("widget")
- '''
- def __init__(self):
- self.border_color = None
- self.border_style = "S"
- self.border_width = 0
- self.border_dashes = None
- self.choice_values = None # choice fields only
- self.rb_parent = None # radio buttons only: xref of owning parent
- self.field_name = None # field name
- self.field_label = None # field label
- self.field_value = None
- self.field_flags = 0
- self.field_display = 0
- self.field_type = 0 # valid range 1 through 7
- self.field_type_string = None # field type as string
- self.fill_color = None
- self.button_caption = None # button caption
- self.is_signed = None # True / False if signature
- self.text_color = (0, 0, 0)
- self.text_font = "Helv"
- self.text_fontsize = 0
- self.text_maxlen = 0 # text fields only
- self.text_format = 0 # text fields only
- self._text_da = "" # /DA = default appearance
- self.script = None # JavaScript (/A)
- self.script_stroke = None # JavaScript (/AA/K)
- self.script_format = None # JavaScript (/AA/F)
- self.script_change = None # JavaScript (/AA/V)
- self.script_calc = None # JavaScript (/AA/C)
- self.script_blur = None # JavaScript (/AA/Bl)
- self.script_focus = None # JavaScript (/AA/Fo) codespell:ignore
- self.rect = None # annot value
- self.xref = 0 # annot value
- def __repr__(self):
- #return "'%s' widget on %s" % (self.field_type_string, str(self.parent))
- # No self.parent.
- return f'Widget:(field_type={self.field_type_string} script={self.script})'
- return "'%s' widget" % (self.field_type_string)
- def _adjust_font(self):
- """Ensure text_font is from our list and correctly spelled.
- """
- if not self.text_font:
- self.text_font = "Helv"
- return
- valid_fonts = ("Cour", "TiRo", "Helv", "ZaDb")
- for f in valid_fonts:
- if self.text_font.lower() == f.lower():
- self.text_font = f
- return
- self.text_font = "Helv"
- return
- def _checker(self):
- """Any widget type checks.
- """
- if self.field_type not in range(1, 8):
- raise ValueError("bad field type")
- # if setting a radio button to ON, first set Off all buttons
- # in the group - this is not done by MuPDF:
- if self.field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON and self.field_value not in (False, "Off") and hasattr(self, "parent"):
- # so we are about setting this button to ON/True
- # check other buttons in same group and set them to 'Off'
- doc = self.parent.parent
- kids_type, kids_value = doc.xref_get_key(self.xref, "Parent/Kids")
- if kids_type == "array":
- xrefs = tuple(map(int, kids_value[1:-1].replace("0 R","").split()))
- for xref in xrefs:
- if xref != self.xref:
- doc.xref_set_key(xref, "AS", "/Off")
- # the calling method will now set the intended button to on and
- # will find everything prepared for correct functioning.
- def _parse_da(self):
- """Extract font name, size and color from default appearance string (/DA object).
- Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'.
- """
- if not self._text_da:
- return
- font = "Helv"
- fsize = 0
- col = (0, 0, 0)
- dat = self._text_da.split() # split on any whitespace
- for i, item in enumerate(dat):
- if item == "Tf":
- font = dat[i - 2][1:]
- fsize = float(dat[i - 1])
- dat[i] = dat[i-1] = dat[i-2] = ""
- continue
- if item == "g": # unicolor text
- col = [(float(dat[i - 1]))]
- dat[i] = dat[i-1] = ""
- continue
- if item == "rg": # RGB colored text
- col = [float(f) for f in dat[i - 3:i]]
- dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
- continue
- self.text_font = font
- self.text_fontsize = fsize
- self.text_color = col
- self._text_da = ""
- return
- def _validate(self):
- """Validate the class entries.
- """
- if (self.rect.is_infinite
- or self.rect.is_empty
- ):
- raise ValueError("bad rect")
- if not self.field_name:
- raise ValueError("field name missing")
- if self.field_label == "Unnamed":
- self.field_label = None
- CheckColor(self.border_color)
- CheckColor(self.fill_color)
- if not self.text_color:
- self.text_color = (0, 0, 0)
- CheckColor(self.text_color)
- if not self.border_width:
- self.border_width = 0
- if not self.text_fontsize:
- self.text_fontsize = 0
- self.border_style = self.border_style.upper()[0:1]
- # standardize content of JavaScript entries
- btn_type = self.field_type in (
- mupdf.PDF_WIDGET_TYPE_BUTTON,
- mupdf.PDF_WIDGET_TYPE_CHECKBOX,
- mupdf.PDF_WIDGET_TYPE_RADIOBUTTON,
- )
- if not self.script:
- self.script = None
- elif type(self.script) is not str:
- raise ValueError("script content must be a string")
- # buttons cannot have the following script actions
- if btn_type or not self.script_calc:
- self.script_calc = None
- elif type(self.script_calc) is not str:
- raise ValueError("script_calc content must be a string")
- if btn_type or not self.script_change:
- self.script_change = None
- elif type(self.script_change) is not str:
- raise ValueError("script_change content must be a string")
- if btn_type or not self.script_format:
- self.script_format = None
- elif type(self.script_format) is not str:
- raise ValueError("script_format content must be a string")
- if btn_type or not self.script_stroke:
- self.script_stroke = None
- elif type(self.script_stroke) is not str:
- raise ValueError("script_stroke content must be a string")
- if btn_type or not self.script_blur:
- self.script_blur = None
- elif type(self.script_blur) is not str:
- raise ValueError("script_blur content must be a string")
- if btn_type or not self.script_focus:
- self.script_focus = None
- elif type(self.script_focus) is not str:
- raise ValueError("script_focus content must be a string")
- self._checker() # any field_type specific checks
- def _sync_flags(self):
- """Propagate the field flags.
- If this widget has a "/Parent", set its field flags and that of all
- its /Kids widgets to the value of the current widget.
- Only possible for widgets existing in the PDF.
- Returns True or False.
- """
- if not self.xref:
- return False # no xref: widget not in the PDF
- doc = self.parent.parent # the owning document
- assert doc
- pdf = _as_pdf_document(doc)
- # load underlying PDF object
- pdf_widget = mupdf.pdf_load_object(pdf, self.xref)
- Parent = mupdf.pdf_dict_get(pdf_widget, PDF_NAME("Parent"))
- if not Parent.pdf_is_dict():
- return False # no /Parent: nothing to do
- # put the field flags value into the parent field flags:
- Parent.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
- # also put that value into all kids of the Parent
- kids = Parent.pdf_dict_get(PDF_NAME("Kids"))
- if not kids.pdf_is_array():
- message("warning: malformed PDF, Parent has no Kids array")
- return False # no /Kids: should never happen!
- for i in range(kids.pdf_array_len()): # walk through all kids
- # access kid widget, and do some precautionary checks
- kid = kids.pdf_array_get(i)
- if not kid.pdf_is_dict():
- continue
- xref = kid.pdf_to_num() # get xref of the kid
- if xref == self.xref: # skip self widget
- continue
- subtype = kid.pdf_dict_get(PDF_NAME("Subtype"))
- if not subtype.pdf_to_name() == "Widget":
- continue
- # put the field flags value into the kid field flags:
- kid.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
- return True # all done
- def button_states(self):
- """Return the on/off state names for button widgets.
- A button may have 'normal' or 'pressed down' appearances. While the 'Off'
- state is usually called like this, the 'On' state is often given a name
- relating to the functional context.
- """
- if self.field_type not in (2, 5):
- return None # no button type
- if hasattr(self, "parent"): # field already exists on page
- doc = self.parent.parent
- else:
- return
- xref = self.xref
- states = {"normal": None, "down": None}
- APN = doc.xref_get_key(xref, "AP/N")
- if APN[0] == "dict":
- nstates = []
- APN = APN[1][2:-2]
- apnt = APN.split("/")[1:]
- for x in apnt:
- nstates.append(x.split()[0])
- states["normal"] = nstates
- if APN[0] == "xref":
- nstates = []
- nxref = int(APN[1].split(" ")[0])
- APN = doc.xref_object(nxref)
- apnt = APN.split("/")[1:]
- for x in apnt:
- nstates.append(x.split()[0])
- states["normal"] = nstates
- APD = doc.xref_get_key(xref, "AP/D")
- if APD[0] == "dict":
- dstates = []
- APD = APD[1][2:-2]
- apdt = APD.split("/")[1:]
- for x in apdt:
- dstates.append(x.split()[0])
- states["down"] = dstates
- if APD[0] == "xref":
- dstates = []
- dxref = int(APD[1].split(" ")[0])
- APD = doc.xref_object(dxref)
- apdt = APD.split("/")[1:]
- for x in apdt:
- dstates.append(x.split()[0])
- states["down"] = dstates
- return states
- @property
- def next(self):
- return self._annot.next
- def on_state(self):
- """Return the "On" value for button widgets.
-
- This is useful for radio buttons mainly. Checkboxes will always return
- "Yes". Radio buttons will return the string that is unequal to "Off"
- as returned by method button_states().
- If the radio button is new / being created, it does not yet have an
- "On" value. In this case, a warning is shown and True is returned.
- """
- if self.field_type not in (2, 5):
- return None # no checkbox or radio button
- bstate = self.button_states()
- if bstate is None:
- bstate = dict()
- for k in bstate.keys():
- for v in bstate[k]:
- if v != "Off":
- return v
- message("warning: radio button has no 'On' value.")
- return True
- def reset(self):
- """Reset the field value to its default.
- """
- TOOLS._reset_widget(self._annot)
- def update(self, sync_flags=False):
- """Reflect Python object in the PDF."""
- self._validate()
- self._adjust_font() # ensure valid text_font name
- # now create the /DA string
- self._text_da = ""
- if len(self.text_color) == 3:
- fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + self._text_da
- elif len(self.text_color) == 1:
- fmt = "{:g} g /{f:s} {s:g} Tf" + self._text_da
- elif len(self.text_color) == 4:
- fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" + self._text_da
- self._text_da = fmt.format(*self.text_color, f=self.text_font,
- s=self.text_fontsize)
- # finally update the widget
- # if widget has a '/AA/C' script, make sure it is in the '/CO'
- # array of the '/AcroForm' dictionary.
- if self.script_calc: # there is a "calculation" script:
- # make sure we are in the /CO array
- util_ensure_widget_calc(self._annot)
- # finally update the widget
- TOOLS._save_widget(self._annot, self)
- self._text_da = ""
- if sync_flags:
- self._sync_flags() # propagate field flags to parent and kids
- from . import _extra
- class Outline:
- def __init__(self, ol):
- self.this = ol
- @property
- def dest(self):
- '''outline destination details'''
- return linkDest(self, None, None)
- def destination(self, document):
- '''
- Like `dest` property but uses `document` to resolve destinations for
- kind=LINK_NAMED.
- '''
- return linkDest(self, None, document)
-
- @property
- def down(self):
- ol = self.this
- down_ol = ol.down()
- if not down_ol.m_internal:
- return
- return Outline(down_ol)
- @property
- def is_external(self):
- if g_use_extra:
- # calling _extra.* here appears to save significant time in
- # test_toc.py:test_full_toc, 1.2s=>0.94s.
- #
- return _extra.Outline_is_external( self.this)
- ol = self.this
- if not ol.m_internal:
- return False
- uri = ol.m_internal.uri if 1 else ol.uri()
- if uri is None:
- return False
- return mupdf.fz_is_external_link(uri)
- @property
- def is_open(self):
- if 1:
- return self.this.m_internal.is_open
- return self.this.is_open()
- @property
- def next(self):
- ol = self.this
- next_ol = ol.next()
- if not next_ol.m_internal:
- return
- return Outline(next_ol)
- @property
- def page(self):
- if 1:
- return self.this.m_internal.page.page
- return self.this.page().page
- @property
- def title(self):
- return self.this.m_internal.title
- @property
- def uri(self):
- ol = self.this
- if not ol.m_internal:
- return None
- return ol.m_internal.uri
- @property
- def x(self):
- return self.this.m_internal.x
- @property
- def y(self):
- return self.this.m_internal.y
- __slots__ = [ 'this']
- def _make_PdfFilterOptions(
- recurse=0,
- instance_forms=0,
- ascii=0,
- no_update=0,
- sanitize=0,
- sopts=None,
- ):
- '''
- Returns a mupdf.PdfFilterOptions instance.
- '''
- filter_ = mupdf.PdfFilterOptions()
- filter_.recurse = recurse
- filter_.instance_forms = instance_forms
- filter_.ascii = ascii
-
- filter_.no_update = no_update
- if sanitize:
- # We want to use a PdfFilterFactory whose `.filter` fn pointer is
- # set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to
- # get access to this raw fn in Python; and on Windows raw MuPDF
- # functions are not even available to C++.
- #
- # So we use SWIG Director to implement our own
- # PdfFilterFactory whose `filter()` method calls
- # `mupdf.ll_pdf_new_sanitize_filter()`.
- if sopts:
- assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions)
- else:
- sopts = mupdf.PdfSanitizeFilterOptions()
- class Factory(mupdf.PdfFilterFactory2):
- def __init__(self):
- super().__init__()
- self.use_virtual_filter()
- self.sopts = sopts
- def filter(self, ctx, doc, chain, struct_parents, transform, options):
- if 0:
- log(f'sanitize filter.filter():')
- log(f' {self=}')
- log(f' {ctx=}')
- log(f' {doc=}')
- log(f' {chain=}')
- log(f' {struct_parents=}')
- log(f' {transform=}')
- log(f' {options=}')
- log(f' {self.sopts.internal()=}')
- return mupdf.ll_pdf_new_sanitize_filter(
- doc,
- chain,
- struct_parents,
- transform,
- options,
- self.sopts.internal(),
- )
- factory = Factory()
- filter_.add_factory(factory.internal())
- filter_._factory = factory
- return filter_
- class Page:
- def __init__(self, page, document):
- assert isinstance(page, (mupdf.FzPage, mupdf.PdfPage)), f'page is: {page}'
- self.this = page
- self.thisown = True
- self.last_point = None
- self.draw_cont = ''
- self._annot_refs = dict()
- self.parent = document
- if page.m_internal:
- if isinstance( page, mupdf.PdfPage):
- self.number = page.m_internal.super.number
- else:
- self.number = page.m_internal.number
- else:
- self.number = None
- def __repr__(self):
- return self.__str__()
- CheckParent(self)
- x = self.parent.name
- if self.parent.stream is not None:
- x = "<memory, doc# %i>" % (self.parent._graft_id,)
- if x == "":
- x = "<new PDF, doc# %i>" % self.parent._graft_id
- return "page %s of %s" % (self.number, x)
- def __str__(self):
- #CheckParent(self)
- parent = getattr(self, 'parent', None)
- if isinstance(self.this.m_internal, mupdf.pdf_page):
- number = self.this.m_internal.super.number
- else:
- number = self.this.m_internal.number
- ret = f'page {number}'
- if parent:
- x = self.parent.name
- if self.parent.stream is not None:
- x = "<memory, doc# %i>" % (self.parent._graft_id,)
- if x == "":
- x = "<new PDF, doc# %i>" % self.parent._graft_id
- ret += f' of {x}'
- return ret
- def _add_caret_annot(self, point):
- if g_use_extra:
- annot = extra._add_caret_annot( self.this, JM_point_from_py(point))
- else:
- page = self._pdf_page()
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_CARET)
- if point:
- p = JM_point_from_py(point)
- r = mupdf.pdf_annot_rect(annot)
- r = mupdf.FzRect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return annot
- def _add_file_annot(self, point, buffer_, filename, ufilename=None, desc=None, icon=None):
- page = self._pdf_page()
- uf = ufilename if ufilename else filename
- d = desc if desc else filename
- p = JM_point_from_py(point)
- filebuf = JM_BufferFromBytes(buffer_)
- if not filebuf.m_internal:
- raise TypeError( MSG_BAD_BUFFER)
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FILE_ATTACHMENT)
- r = mupdf.pdf_annot_rect(annot)
- r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
- mupdf.pdf_set_annot_rect(annot, r)
- flags = mupdf.PDF_ANNOT_IS_PRINT
- mupdf.pdf_set_annot_flags(annot, flags)
- if icon:
- mupdf.pdf_set_annot_icon_name(annot, icon)
- val = JM_embed_file(page.doc(), filebuf, filename, uf, d, 1)
- mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('FS'), val)
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('Contents'), filename)
- mupdf.pdf_update_annot(annot)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_set_annot_flags(annot, flags)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_freetext_annot(
- self, rect,
- text,
- fontsize=11,
- fontname=None,
- text_color=None,
- fill_color=None,
- border_color=None,
- border_width=0,
- dashes=None,
- callout=None,
- line_end=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
- opacity=1,
- align=0,
- rotate=0,
- richtext=False,
- style=None,
- ):
- rc = f"""<?xml version="1.0"?>
- <body xmlns="http://www.w3.org/1999/xtml"
- xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"
- xfa:contentType="text/html" xfa:APIVersion="Acrobat:8.0.0" xfa:spec="2.4">
- {text}"""
- page = self._pdf_page()
- if border_color and not richtext:
- raise ValueError("cannot set border_color if rich_text is False")
- if border_color and not text_color:
- text_color = border_color
- nfcol, fcol = JM_color_FromSequence(fill_color)
- ntcol, tcol = JM_color_FromSequence(text_color)
- r = JM_rect_from_py(rect)
- if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
- raise ValueError( MSG_BAD_RECT)
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FREE_TEXT)
- annot_obj = mupdf.pdf_annot_obj(annot)
- #insert text as 'contents' or 'RC' depending on 'richtext'
- if not richtext:
- mupdf.pdf_set_annot_contents(annot, text)
- else:
- mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("RC"), rc)
- if style:
- mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("DS"), style)
- mupdf.pdf_set_annot_rect(annot, r)
- while rotate < 0:
- rotate += 360
- while rotate >= 360:
- rotate -= 360
- if rotate != 0:
- mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
- mupdf.pdf_set_annot_quadding(annot, align)
- if nfcol > 0:
- mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
- mupdf.pdf_set_annot_border_width(annot, border_width)
- mupdf.pdf_set_annot_opacity(annot, opacity)
- if dashes:
- for d in dashes:
- mupdf.pdf_add_annot_border_dash_item(annot, float(d))
- # Insert callout information
- if callout:
- mupdf.pdf_dict_put(annot_obj, PDF_NAME("IT"), PDF_NAME("FreeTextCallout"))
- mupdf.pdf_set_annot_callout_style(annot, line_end)
- point_count = len(callout)
- extra.JM_set_annot_callout_line(annot, tuple(callout), point_count)
- # insert the default appearance string
- if not richtext:
- JM_make_annot_DA(annot, ntcol, tcol, fontname, fontsize)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- val = Annot(annot)
- return val
- def _add_ink_annot(self, list):
- page = _as_pdf_page(self.this)
- if not PySequence_Check(list):
- raise ValueError( MSG_BAD_ARG_INK_ANNOT)
- ctm = mupdf.FzMatrix()
- mupdf.pdf_page_transform(page, mupdf.FzRect(0), ctm)
- inv_ctm = mupdf.fz_invert_matrix(ctm)
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_INK)
- annot_obj = mupdf.pdf_annot_obj(annot)
- n0 = len(list)
- inklist = mupdf.pdf_new_array(page.doc(), n0)
- for j in range(n0):
- sublist = list[j]
- n1 = len(sublist)
- stroke = mupdf.pdf_new_array(page.doc(), 2 * n1)
- for i in range(n1):
- p = sublist[i]
- if not PySequence_Check(p) or PySequence_Size(p) != 2:
- raise ValueError( MSG_BAD_ARG_INK_ANNOT)
- point = mupdf.fz_transform_point(JM_point_from_py(p), inv_ctm)
- mupdf.pdf_array_push_real(stroke, point.x)
- mupdf.pdf_array_push_real(stroke, point.y)
- mupdf.pdf_array_push(inklist, stroke)
- mupdf.pdf_dict_put(annot_obj, PDF_NAME('InkList'), inklist)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_line_annot(self, p1, p2):
- page = self._pdf_page()
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE)
- a = JM_point_from_py(p1)
- b = JM_point_from_py(p2)
- mupdf.pdf_set_annot_line(annot, a, b)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- assert annot.m_internal
- return Annot(annot)
- def _add_multiline(self, points, annot_type):
- page = self._pdf_page()
- if len(points) < 2:
- raise ValueError( MSG_BAD_ARG_POINTS)
- annot = mupdf.pdf_create_annot(page, annot_type)
- for p in points:
- if (PySequence_Size(p) != 2):
- raise ValueError( MSG_BAD_ARG_POINTS)
- point = JM_point_from_py(p)
- mupdf.pdf_add_annot_vertex(annot, point)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_redact_annot(self, quad, text=None, da_str=None, align=0, fill=None, text_color=None):
- page = self._pdf_page()
- fcol = [ 1, 1, 1, 0]
- nfcol = 0
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_REDACT)
- q = JM_quad_from_py(quad)
- r = mupdf.fz_rect_from_quad(q)
- # TODO calculate de-rotated rect
- mupdf.pdf_set_annot_rect(annot, r)
- if fill:
- nfcol, fcol = JM_color_FromSequence(fill)
- arr = mupdf.pdf_new_array(page.doc(), nfcol)
- for i in range(nfcol):
- mupdf.pdf_array_push_real(arr, fcol[i])
- mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('IC'), arr)
- if text:
- assert da_str
- mupdf.pdf_dict_puts(
- mupdf.pdf_annot_obj(annot),
- "OverlayText",
- mupdf.pdf_new_text_string(text),
- )
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('DA'), da_str)
- mupdf.pdf_dict_put_int(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'), align)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- annot = mupdf.ll_pdf_keep_annot(annot.m_internal)
- annot = mupdf.PdfAnnot( annot)
- return Annot(annot)
- def _add_square_or_circle(self, rect, annot_type):
- page = self._pdf_page()
- r = JM_rect_from_py(rect)
- if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
- raise ValueError( MSG_BAD_RECT)
- annot = mupdf.pdf_create_annot(page, annot_type)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- assert annot.m_internal
- return Annot(annot)
- def _add_stamp_annot(self, rect, stamp=0):
- rect = Rect(rect)
- r = JM_rect_from_py(rect)
- if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
- raise ValueError(MSG_BAD_RECT)
- page = self._pdf_page()
- stamp_id = [
- "Approved",
- "AsIs",
- "Confidential",
- "Departmental",
- "Experimental",
- "Expired",
- "Final",
- "ForComment",
- "ForPublicRelease",
- "NotApproved",
- "NotForPublicRelease",
- "Sold",
- "TopSecret",
- "Draft",
- ]
- n = len(stamp_id)
- buf = None
- name = None
- if stamp in range(n):
- name = stamp_id[stamp]
- elif isinstance(stamp, Pixmap):
- buf = stamp.tobytes()
- elif isinstance(stamp, str):
- buf = pathlib.Path(stamp).read_bytes()
- elif isinstance(stamp, (bytes, bytearray)):
- buf = stamp
- elif isinstance(stamp, io.BytesIO):
- buf = stamp.getvalue()
- else:
- name = stamp_id[0]
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_STAMP)
- if buf: # image stamp
- fzbuff = mupdf.fz_new_buffer_from_copied_data(buf)
- img = mupdf.fz_new_image_from_buffer(fzbuff)
- # compute image boundary box on page
- w, h = img.w(), img.h()
- scale = min(rect.width / w, rect.height / h)
- width = w * scale # bbox width
- height = h * scale # bbox height
- # center of "rect"
- center = (rect.tl + rect.br) / 2
- x0 = center.x - width / 2
- y0 = center.y - height / 2
- x1 = x0 + width
- y1 = y0 + height
- r = mupdf.fz_make_rect(x0, y0, x1, y1)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_set_annot_stamp_image(annot, img)
- mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), mupdf.pdf_new_name("ImageStamp"))
- mupdf.pdf_set_annot_contents(annot, "Image Stamp")
- else: # text stamp
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), PDF_NAME(name))
- mupdf.pdf_set_annot_contents(annot, name)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_text_annot(self, point, text, icon=None):
- page = self._pdf_page()
- p = JM_point_from_py( point)
- annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT)
- r = mupdf.pdf_annot_rect(annot)
- r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
- mupdf.pdf_set_annot_rect(annot, r)
- mupdf.pdf_set_annot_contents(annot, text)
- if icon:
- mupdf.pdf_set_annot_icon_name(annot, icon)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- return Annot(annot)
- def _add_text_marker(self, quads, annot_type):
- CheckParent(self)
- if not self.parent.is_pdf:
- raise ValueError("is no PDF")
- val = Page__add_text_marker(self, quads, annot_type)
- if not val:
- return None
- val.parent = weakref.proxy(self)
- self._annot_refs[id(val)] = val
- return val
- def _addAnnot_FromString(self, linklist):
- """Add links from list of object sources."""
- CheckParent(self)
- if g_use_extra:
- self.__class__._addAnnot_FromString = extra.Page_addAnnot_FromString
- #log('Page._addAnnot_FromString() deferring to extra.Page_addAnnot_FromString().')
- return extra.Page_addAnnot_FromString( self.this, linklist)
- page = _as_pdf_page(self.this)
- lcount = len(linklist) # link count
- if lcount < 1:
- return
- i = -1
- # insert links from the provided sources
- if not isinstance(linklist, tuple):
- raise ValueError( "bad 'linklist' argument")
- if not mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')).m_internal:
- mupdf.pdf_dict_put_array( page.obj(), PDF_NAME('Annots'), lcount)
- annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
- assert annots.m_internal, f'{lcount=} {annots.m_internal=}'
- for i in range(lcount):
- txtpy = linklist[i]
- text = JM_StrAsChar(txtpy)
- if not text:
- message("skipping bad link / annot item %i.", i)
- continue
- try:
- annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text))
- ind_obj = mupdf.pdf_new_indirect( page.doc(), mupdf.pdf_to_num( annot), 0)
- mupdf.pdf_array_push( annots, ind_obj)
- except Exception:
- if g_exceptions_verbose: exception_info()
- message("skipping bad link / annot item %i.\n" % i)
- def _addWidget(self, field_type, field_name):
- page = self._pdf_page()
- pdf = page.doc()
- annot = JM_create_widget(pdf, page, field_type, field_name)
- if not annot.m_internal:
- raise RuntimeError( "cannot create widget")
- JM_add_annot_id(annot, "W")
- return Annot(annot)
- def _apply_redactions(self, text, images, graphics):
- page = self._pdf_page()
- opts = mupdf.PdfRedactOptions()
- opts.black_boxes = 0 # no black boxes
- opts.text = text # how to treat text
- opts.image_method = images # how to treat images
- opts.line_art = graphics # how to treat vector graphics
- success = mupdf.pdf_redact_page(page.doc(), page, opts)
- return success
- def _erase(self):
- self._reset_annot_refs()
- try:
- self.parent._forget_page(self)
- except Exception:
- exception_info()
- pass
- self.parent = None
- self.thisown = False
- self.number = None
- self.this = None
- def _count_q_balance(self):
- """Count missing graphic state pushs and pops.
- Returns:
- A pair of integers (push, pop). Push is the number of missing
- PDF "q" commands, pop is the number of "Q" commands.
- A balanced graphics state for the page will be reached if its
- /Contents is prepended with 'push' copies of string "q\n"
- and appended with 'pop' copies of "\nQ".
- """
- page = _as_pdf_page(self) # need the underlying PDF page
- res = mupdf.pdf_dict_get( # access /Resources
- page.obj(),
- mupdf.PDF_ENUM_NAME_Resources,
- )
- cont = mupdf.pdf_dict_get( # access /Contents
- page.obj(),
- mupdf.PDF_ENUM_NAME_Contents,
- )
- pdf = _as_pdf_document(self.parent) # need underlying PDF document
- # return value of MuPDF function
- return mupdf.pdf_count_q_balance_outparams_fn(pdf, res, cont)
- def _get_optional_content(self, oc: OptInt) -> OptStr:
- if oc is None or oc == 0:
- return None
- doc = self.parent
- check = doc.xref_object(oc, compressed=True)
- if not ("/Type/OCG" in check or "/Type/OCMD" in check):
- #log( 'raising "bad optional content"')
- raise ValueError("bad optional content: 'oc'")
- #log( 'Looking at self._get_resource_properties()')
- props = {}
- for p, x in self._get_resource_properties():
- props[x] = p
- if oc in props.keys():
- return props[oc]
- i = 0
- mc = "MC%i" % i
- while mc in props.values():
- i += 1
- mc = "MC%i" % i
- self._set_resource_property(mc, oc)
- #log( 'returning {mc=}')
- return mc
- def _get_resource_properties(self):
- '''
- page list Resource/Properties
- '''
- page = self._pdf_page()
- rc = JM_get_resource_properties(page.obj())
- return rc
- def _get_textpage(self, clip=None, flags=0, matrix=None):
- if g_use_extra:
- ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
- tpage = mupdf.FzStextPage(ll_tpage)
- return tpage
- page = self.this
- options = mupdf.FzStextOptions(flags)
- rect = JM_rect_from_py(clip)
- # Default to page's rect if `clip` not specified, for #2048.
- rect = mupdf.fz_bound_page(page) if clip is None else JM_rect_from_py(clip)
- ctm = JM_matrix_from_py(matrix)
- tpage = mupdf.FzStextPage(rect)
- dev = mupdf.fz_new_stext_device(tpage, options)
- if _globals.no_device_caching:
- mupdf.fz_enable_device_hints( dev, mupdf.FZ_NO_CACHE)
- if isinstance(page, mupdf.FzPage):
- pass
- elif isinstance(page, mupdf.PdfPage):
- page = page.super()
- else:
- assert 0, f'Unrecognised {type(page)=}'
- mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- return tpage
- def _insert_image(self,
- filename=None, pixmap=None, stream=None, imask=None, clip=None,
- overlay=1, rotate=0, keep_proportion=1, oc=0, width=0, height=0,
- xref=0, alpha=-1, _imgname=None, digests=None
- ):
- maskbuf = mupdf.FzBuffer()
- page = self._pdf_page()
- # This will create an empty PdfDocument with a call to
- # pdf_new_document() then assign page.doc()'s return value to it (which
- # drop the original empty pdf_document).
- pdf = page.doc()
- w = width
- h = height
- img_xref = xref
- rc_digest = 0
- do_process_pixmap = 1
- do_process_stream = 1
- do_have_imask = 1
- do_have_image = 1
- do_have_xref = 1
- if xref > 0:
- ref = mupdf.pdf_new_indirect(pdf, xref, 0)
- w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
- h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
- if w + h == 0:
- raise ValueError( MSG_IS_NO_IMAGE)
- #goto have_xref()
- do_process_pixmap = 0
- do_process_stream = 0
- do_have_imask = 0
- do_have_image = 0
- else:
- if stream:
- imgbuf = JM_BufferFromBytes(stream)
- do_process_pixmap = 0
- else:
- if filename:
- imgbuf = mupdf.fz_read_file(filename)
- #goto have_stream()
- do_process_pixmap = 0
- if do_process_pixmap:
- #log( 'do_process_pixmap')
- # process pixmap ---------------------------------
- arg_pix = pixmap.this
- w = arg_pix.w()
- h = arg_pix.h()
- digest = mupdf.fz_md5_pixmap2(arg_pix)
- md5_py = digest
- temp = digests.get(md5_py, None)
- if temp is not None:
- img_xref = temp
- ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
- #goto have_xref()
- do_process_stream = 0
- do_have_imask = 0
- do_have_image = 0
- else:
- if arg_pix.alpha() == 0:
- image = mupdf.fz_new_image_from_pixmap(arg_pix, mupdf.FzImage())
- else:
- pm = mupdf.fz_convert_pixmap(
- arg_pix,
- mupdf.FzColorspace(),
- mupdf.FzColorspace(),
- mupdf.FzDefaultColorspaces(None),
- mupdf.FzColorParams(),
- 1,
- )
- pm.alpha = 0
- pm.colorspace = None
- mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage())
- image = mupdf.fz_new_image_from_pixmap(arg_pix, mask)
- #goto have_image()
- do_process_stream = 0
- do_have_imask = 0
- if do_process_stream:
- #log( 'do_process_stream')
- # process stream ---------------------------------
- state = mupdf.FzMd5()
- if mupdf_cppyy:
- mupdf.fz_md5_update_buffer( state, imgbuf)
- else:
- mupdf.fz_md5_update(state, imgbuf.m_internal.data, imgbuf.m_internal.len)
- if imask:
- maskbuf = JM_BufferFromBytes(imask)
- if mupdf_cppyy:
- mupdf.fz_md5_update_buffer( state, maskbuf)
- else:
- mupdf.fz_md5_update(state, maskbuf.m_internal.data, maskbuf.m_internal.len)
- digest = mupdf.fz_md5_final2(state)
- md5_py = bytes(digest)
- temp = digests.get(md5_py, None)
- if temp is not None:
- img_xref = temp
- ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
- w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
- h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
- #goto have_xref()
- do_have_imask = 0
- do_have_image = 0
- else:
- image = mupdf.fz_new_image_from_buffer(imgbuf)
- w = image.w()
- h = image.h()
- if not imask:
- #goto have_image()
- do_have_imask = 0
- if do_have_imask:
- # `fz_compressed_buffer` is reference counted and
- # `mupdf.fz_new_image_from_compressed_buffer2()`
- # is povided as a Swig-friendly wrapper for
- # `fz_new_image_from_compressed_buffer()`, so we can do things
- # straightfowardly.
- #
- cbuf1 = mupdf.fz_compressed_image_buffer( image)
- if not cbuf1.m_internal:
- raise ValueError( "uncompressed image cannot have mask")
- bpc = image.bpc()
- colorspace = image.colorspace()
- xres, yres = mupdf.fz_image_resolution(image)
- mask = mupdf.fz_new_image_from_buffer(maskbuf)
- image = mupdf.fz_new_image_from_compressed_buffer2(
- w,
- h,
- bpc,
- colorspace,
- xres,
- yres,
- 1, # interpolate
- 0, # imagemask,
- list(), # decode
- list(), # colorkey
- cbuf1,
- mask,
- )
-
- if do_have_image:
- #log( 'do_have_image')
- ref = mupdf.pdf_add_image(pdf, image)
- if oc:
- JM_add_oc_object(pdf, ref, oc)
- img_xref = mupdf.pdf_to_num(ref)
- digests[md5_py] = img_xref
- rc_digest = 1
- if do_have_xref:
- #log( 'do_have_xref')
- resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
- xobject = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
- if not xobject.m_internal:
- xobject = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 2)
- mat = calc_image_matrix(w, h, clip, rotate, keep_proportion)
- mupdf.pdf_dict_puts(xobject, _imgname, ref)
- nres = mupdf.fz_new_buffer(50)
- s = f"\nq\n{_format_g((mat.a, mat.b, mat.c, mat.d, mat.e, mat.f))} cm\n/{_imgname} Do\nQ\n"
- #s = s.replace('\n', '\r\n')
- mupdf.fz_append_string(nres, s)
- JM_insert_contents(pdf, page.obj(), nres, overlay)
- if rc_digest:
- return img_xref, digests
- else:
- return img_xref, None
- def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
- page = self._pdf_page()
- pdf = page.doc()
- value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering)
- # get the objects /Resources, /Resources/Font
- resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
- if not resources.pdf_is_dict():
- resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME("Resources"), 5)
- fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
- if not fonts.m_internal: # page has no fonts yet
- fonts = mupdf.pdf_new_dict(pdf, 5)
- mupdf.pdf_dict_putl(page.obj(), fonts, PDF_NAME('Resources'), PDF_NAME('Font'))
- # store font in resources and fonts objects will contain named reference to font
- _, xref = JM_INT_ITEM(value, 0)
- if not xref:
- raise RuntimeError( "cannot insert font")
- font_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- mupdf.pdf_dict_puts(fonts, fontname, font_obj)
- return value
- def _load_annot(self, name, xref):
- page = self._pdf_page()
- if xref == 0:
- annot = JM_get_annot_by_name(page, name)
- else:
- annot = JM_get_annot_by_xref(page, xref)
- if annot.m_internal:
- return Annot(annot)
- def _makePixmap(self, doc, ctm, cs, alpha=0, annots=1, clip=None):
- pix = JM_pixmap_from_page(doc, self.this, ctm, cs, alpha, annots, clip)
- return Pixmap(pix)
- def _other_box(self, boxtype):
- rect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
- page = _as_pdf_page(self.this, required=False)
- if page.m_internal:
- obj = mupdf.pdf_dict_gets( page.obj(), boxtype)
- if mupdf.pdf_is_array(obj):
- rect = mupdf.pdf_to_rect(obj)
- if mupdf.fz_is_infinite_rect( rect):
- return
- return JM_py_from_rect(rect)
- def _pdf_page(self, required=True):
- return _as_pdf_page(self.this, required=required)
- def _reset_annot_refs(self):
- """Invalidate / delete all annots of this page."""
- self._annot_refs.clear()
- def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None):
- if CA >= 1 and ca >= 1 and blendmode is None:
- return
- tCA = int(round(max(CA , 0) * 100))
- if tCA >= 100:
- tCA = 99
- tca = int(round(max(ca, 0) * 100))
- if tca >= 100:
- tca = 99
- gstate = "fitzca%02i%02i" % (tCA, tca)
- if not gstate:
- return
- page = _as_pdf_page(self.this)
- resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
- extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
- if not extg.m_internal:
- extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), 2)
- n = mupdf.pdf_dict_len(extg)
- for i in range(n):
- o1 = mupdf.pdf_dict_get_key(extg, i)
- name = mupdf.pdf_to_name(o1)
- if name == gstate:
- return gstate
- opa = mupdf.pdf_new_dict(page.doc(), 3)
- mupdf.pdf_dict_put_real(opa, PDF_NAME('CA'), CA)
- mupdf.pdf_dict_put_real(opa, PDF_NAME('ca'), ca)
- mupdf.pdf_dict_puts(extg, gstate, opa)
- return gstate
- def _set_pagebox(self, boxtype, rect):
- doc = self.parent
- if doc is None:
- raise ValueError("orphaned object: parent is None")
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox")
- if boxtype not in valid_boxes:
- raise ValueError("bad boxtype")
- rect = Rect(rect)
- mb = self.mediabox
- rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
- if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1):
- raise ValueError(f"{boxtype} not in MediaBox")
- doc.xref_set_key(self.xref, boxtype, f"[{_format_g(tuple(rect))}]")
- def _set_resource_property(self, name, xref):
- page = self._pdf_page()
- JM_set_resource_property(page.obj(), name, xref)
- def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None):
- cropbox = JM_rect_from_py(clip)
- mat = JM_matrix_from_py(matrix)
- rc_xref = xref
- tpage = _as_pdf_page(self.this)
- tpageref = tpage.obj()
- pdfout = tpage.doc() # target PDF
- ENSURE_OPERATION(pdfout)
- #-------------------------------------------------------------
- # convert the source page to a Form XObject
- #-------------------------------------------------------------
- xobj1 = JM_xobject_from_page(pdfout, fz_srcpage, xref, graftmap.this)
- if not rc_xref:
- rc_xref = mupdf.pdf_to_num(xobj1)
- #-------------------------------------------------------------
- # create referencing XObject (controls display on target page)
- #-------------------------------------------------------------
- # fill reference to xobj1 into the /Resources
- #-------------------------------------------------------------
- subres1 = mupdf.pdf_new_dict(pdfout, 5)
- mupdf.pdf_dict_puts(subres1, "fullpage", xobj1)
- subres = mupdf.pdf_new_dict(pdfout, 5)
- mupdf.pdf_dict_put(subres, PDF_NAME('XObject'), subres1)
- res = mupdf.fz_new_buffer(20)
- mupdf.fz_append_string(res, "/fullpage Do")
- xobj2 = mupdf.pdf_new_xobject(pdfout, cropbox, mat, subres, res)
- if oc > 0:
- JM_add_oc_object(pdfout, mupdf.pdf_resolve_indirect(xobj2), oc)
- #-------------------------------------------------------------
- # update target page with xobj2:
- #-------------------------------------------------------------
- # 1. insert Xobject in Resources
- #-------------------------------------------------------------
- resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'), 5)
- subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
- if not subres.m_internal:
- subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
- mupdf.pdf_dict_puts(subres, _imgname, xobj2)
- #-------------------------------------------------------------
- # 2. make and insert new Contents object
- #-------------------------------------------------------------
- nres = mupdf.fz_new_buffer(50) # buffer for Do-command
- mupdf.fz_append_string(nres, " q /") # Do-command
- mupdf.fz_append_string(nres, _imgname)
- mupdf.fz_append_string(nres, " Do Q ")
- JM_insert_contents(pdfout, tpageref, nres, overlay)
- return rc_xref
- def add_caret_annot(self, point: point_like) -> Annot:
- """Add a 'Caret' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_caret_annot(point)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot = Annot( annot)
- annot_postprocess(self, annot)
- assert hasattr( annot, 'parent')
- return annot
- def add_circle_annot(self, rect: rect_like) -> Annot:
- """Add a 'Circle' (ellipse, oval) annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_CIRCLE)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_file_annot(
- self,
- point: point_like,
- buffer_: ByteString,
- filename: str,
- ufilename: OptStr =None,
- desc: OptStr =None,
- icon: OptStr =None
- ) -> Annot:
- """Add a 'FileAttachment' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_file_annot(point,
- buffer_,
- filename,
- ufilename=ufilename,
- desc=desc,
- icon=icon,
- )
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_freetext_annot(
- self,
- rect: rect_like,
- text: str,
- *,
- fontsize: float =11,
- fontname: OptStr =None,
- text_color: OptSeq =None,
- fill_color: OptSeq =None,
- border_color: OptSeq =None,
- border_width: float =0,
- dashes: OptSeq =None,
- callout: OptSeq =None,
- line_end: int=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
- opacity: float =1,
- align: int =0,
- rotate: int =0,
- richtext=False,
- style=None,
- ) -> Annot:
- """Add a 'FreeText' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_freetext_annot(
- rect,
- text,
- fontsize=fontsize,
- fontname=fontname,
- text_color=text_color,
- fill_color=fill_color,
- border_color=border_color,
- border_width=border_width,
- dashes=dashes,
- callout=callout,
- line_end=line_end,
- opacity=opacity,
- align=align,
- rotate=rotate,
- richtext=richtext,
- style=style,
- )
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_highlight_annot(self, quads=None, start=None,
- stop=None, clip=None) -> Annot:
- """Add a 'Highlight' annotation."""
- if quads is None:
- q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
- else:
- q = CheckMarkerArg(quads)
- ret = self._add_text_marker(q, mupdf.PDF_ANNOT_HIGHLIGHT)
- return ret
- def add_ink_annot(self, handwriting: list) -> Annot:
- """Add a 'Ink' ('handwriting') annotation.
- The argument must be a list of lists of point_likes.
- """
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_ink_annot(handwriting)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_line_annot(self, p1: point_like, p2: point_like) -> Annot:
- """Add a 'Line' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_line_annot(p1, p2)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_polygon_annot(self, points: list) -> Annot:
- """Add a 'Polygon' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLYGON)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_polyline_annot(self, points: list) -> Annot:
- """Add a 'PolyLine' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLY_LINE)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_rect_annot(self, rect: rect_like) -> Annot:
- """Add a 'Square' (rectangle) annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_SQUARE)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_redact_annot(
- self,
- quad,
- text: OptStr =None,
- fontname: OptStr =None,
- fontsize: float =11,
- align: int =0,
- fill: OptSeq =None,
- text_color: OptSeq =None,
- cross_out: bool =True,
- ) -> Annot:
- """Add a 'Redact' annotation."""
- da_str = None
- if text and not set(string.whitespace).issuperset(text):
- CheckColor(fill)
- CheckColor(text_color)
- if not fontname:
- fontname = "Helv"
- if not fontsize:
- fontsize = 11
- if not text_color:
- text_color = (0, 0, 0)
- if hasattr(text_color, "__float__"):
- text_color = (text_color, text_color, text_color)
- if len(text_color) > 3:
- text_color = text_color[:3]
- fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf"
- da_str = fmt.format(*text_color, f=fontname, s=fontsize)
- if fill is None:
- fill = (1, 1, 1)
- if fill:
- if hasattr(fill, "__float__"):
- fill = (fill, fill, fill)
- if len(fill) > 3:
- fill = fill[:3]
- else:
- text = None
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_redact_annot(quad, text=text, da_str=da_str,
- align=align, fill=fill)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- #-------------------------------------------------------------
- # change appearance to show a crossed-out rectangle
- #-------------------------------------------------------------
- if cross_out:
- ap_tab = annot._getAP().splitlines()[:-1] # get the 4 commands only
- _, LL, LR, UR, UL = ap_tab
- ap_tab.append(LR)
- ap_tab.append(LL)
- ap_tab.append(UR)
- ap_tab.append(LL)
- ap_tab.append(UL)
- ap_tab.append(b"S")
- ap = b"\n".join(ap_tab)
- annot._setAP(ap, 0)
- return annot
- def add_squiggly_annot(
- self,
- quads=None,
- start=None,
- stop=None,
- clip=None,
- ) -> Annot:
- """Add a 'Squiggly' annotation."""
- if quads is None:
- q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
- else:
- q = CheckMarkerArg(quads)
- return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY)
- def add_stamp_annot(self, rect: rect_like, stamp=0) -> Annot:
- """Add a ('rubber') 'Stamp' annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_stamp_annot(rect, stamp)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
- """Add a 'StrikeOut' annotation."""
- if quads is None:
- q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
- else:
- q = CheckMarkerArg(quads)
- return self._add_text_marker(q, mupdf.PDF_ANNOT_STRIKE_OUT)
- def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> Annot:
- """Add a 'Text' (sticky note) annotation."""
- old_rotation = annot_preprocess(self)
- try:
- annot = self._add_text_annot(point, text, icon=icon)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- annot_postprocess(self, annot)
- return annot
- def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
- """Add a 'Underline' annotation."""
- if quads is None:
- q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
- else:
- q = CheckMarkerArg(quads)
- return self._add_text_marker(q, mupdf.PDF_ANNOT_UNDERLINE)
- def add_widget(self, widget: Widget) -> Annot:
- """Add a 'Widget' (form field)."""
- CheckParent(self)
- doc = self.parent
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- widget._validate()
- annot = self._addWidget(widget.field_type, widget.field_name)
- if not annot:
- return None
- annot.thisown = True
- annot.parent = weakref.proxy(self) # owning page object
- self._annot_refs[id(annot)] = annot
- widget.parent = annot.parent
- widget._annot = annot
- widget.update()
- return annot
- def annot_names(self):
- '''
- page get list of annot names
- '''
- """List of names of annotations, fields and links."""
- CheckParent(self)
- page = self._pdf_page(required=False)
- if not page.m_internal:
- return []
- return JM_get_annot_id_list(page)
- def annot_xrefs(self):
- '''
- List of xref numbers of annotations, fields and links.
- '''
- return JM_get_annot_xref_list2(self)
-
- def annots(self, types=None):
- """ Generator over the annotations of a page.
- Args:
- types: (list) annotation types to subselect from. If none,
- all annotations are returned. E.g. types=[PDF_ANNOT_LINE]
- will only yield line annotations.
- """
- skip_types = (mupdf.PDF_ANNOT_LINK, mupdf.PDF_ANNOT_POPUP, mupdf.PDF_ANNOT_WIDGET)
- if not hasattr(types, "__getitem__"):
- annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types]
- else:
- annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types]
- for xref in annot_xrefs:
- annot = self.load_annot(xref)
- annot._yielded=True
- yield annot
- def recolor(self, components=1):
- """Convert colorspaces of objects on the page.
-
- Valid values are 1, 3 and 4.
- """
- if components not in (1, 3, 4):
- raise ValueError("components must be one of 1, 3, 4")
- pdfdoc = _as_pdf_document(self.parent)
- ropt = mupdf.pdf_recolor_options()
- ropt.num_comp = components
- ropts = mupdf.PdfRecolorOptions(ropt)
- mupdf.pdf_recolor_page(pdfdoc, self.number, ropts)
- def clip_to_rect(self, rect):
- """Clip away page content outside the rectangle."""
- clip = Rect(rect)
- if clip.is_infinite or (clip & self.rect).is_empty:
- raise ValueError("rect must not be infinite or empty")
- clip *= self.transformation_matrix
- pdfpage = _as_pdf_page(self)
- pclip = JM_rect_from_py(clip)
- mupdf.pdf_clip_page(pdfpage, pclip)
- @property
- def artbox(self):
- """The ArtBox"""
- rect = self._other_box("ArtBox")
- if rect is None:
- return self.cropbox
- mb = self.mediabox
- return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
- @property
- def bleedbox(self):
- """The BleedBox"""
- rect = self._other_box("BleedBox")
- if rect is None:
- return self.cropbox
- mb = self.mediabox
- return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
- def bound(self):
- """Get page rectangle."""
- CheckParent(self)
- page = _as_fz_page(self.this)
- val = mupdf.fz_bound_page(page)
- val = Rect(val)
-
- if val.is_infinite and self.parent.is_pdf:
- cb = self.cropbox
- w, h = cb.width, cb.height
- if self.rotation not in (0, 180):
- w, h = h, w
- val = Rect(0, 0, w, h)
- msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1]
- message(msg)
-
- return val
- def clean_contents(self, sanitize=1):
- if not sanitize and not self.is_wrapped:
- self.wrap_contents()
- page = _as_pdf_page( self.this, required=False)
- if not page.m_internal:
- return
- filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize)
- mupdf.pdf_filter_page_contents( page.doc(), page, filter_)
-
- @property
- def cropbox(self):
- """The CropBox."""
- CheckParent(self)
- page = self._pdf_page(required=False)
- if not page.m_internal:
- val = mupdf.fz_bound_page(self.this)
- else:
- val = JM_cropbox(page.obj())
- val = Rect(val)
- return val
- @property
- def cropbox_position(self):
- return self.cropbox.tl
- def delete_annot(self, annot):
- """Delete annot and return next one."""
- CheckParent(self)
- CheckParent(annot)
- page = self._pdf_page()
- while 1:
- # first loop through all /IRT annots and remove them
- irt_annot = JM_find_annot_irt(annot.this)
- if not irt_annot: # no more there
- break
- mupdf.pdf_delete_annot(page, irt_annot.this)
- nextannot = mupdf.pdf_next_annot(annot.this) # store next
- mupdf.pdf_delete_annot(page, annot.this)
- val = Annot(nextannot)
- if val:
- val.thisown = True
- val.parent = weakref.proxy(self) # owning page object
- val.parent._annot_refs[id(val)] = val
- annot._erase()
- return val
- def delete_link(self, linkdict):
- """Delete a Link."""
- CheckParent(self)
- if not isinstance( linkdict, dict):
- return # have no dictionary
- def finished():
- if linkdict["xref"] == 0: return
- try:
- linkid = linkdict["id"]
- linkobj = self._annot_refs[linkid]
- linkobj._erase()
- except Exception:
- # Don't print this exception, to match classic. Issue #2841.
- if g_exceptions_verbose > 1: exception_info()
- pass
- page = _as_pdf_page(self.this, required=False)
- if not page.m_internal:
- return finished() # have no PDF
- xref = linkdict[dictkey_xref]
- if xref < 1:
- return finished() # invalid xref
- annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
- if not annots.m_internal:
- return finished() # have no annotations
- len_ = mupdf.pdf_array_len( annots)
- if len_ == 0:
- return finished()
- oxref = 0
- for i in range( len_):
- oxref = mupdf.pdf_to_num( mupdf.pdf_array_get( annots, i))
- if xref == oxref:
- break # found xref in annotations
- if xref != oxref:
- return finished() # xref not in annotations
- mupdf.pdf_array_delete( annots, i) # delete entry in annotations
- mupdf.pdf_delete_object( page.doc(), xref) # delete link object
- mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots)
- JM_refresh_links( page)
- return finished()
- @property
- def derotation_matrix(self) -> Matrix:
- """Reflects page de-rotation."""
- if g_use_extra:
- return Matrix(extra.Page_derotate_matrix( self.this))
- pdfpage = self._pdf_page(required=False)
- if not pdfpage.m_internal:
- return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
- return Matrix(JM_derotate_page_matrix(pdfpage))
- def extend_textpage(self, tpage, flags=0, matrix=None):
- page = self.this
- tp = tpage.this
- assert isinstance( tp, mupdf.FzStextPage)
- options = mupdf.FzStextOptions()
- options.flags = flags
- ctm = JM_matrix_from_py(matrix)
- dev = mupdf.FzDevice(tp, options)
- mupdf.fz_run_page( page, dev, ctm, mupdf.FzCookie())
- mupdf.fz_close_device( dev)
- @property
- def first_annot(self):
- """First annotation."""
- CheckParent(self)
- page = self._pdf_page(required=False)
- if not page.m_internal:
- return
- annot = mupdf.pdf_first_annot(page)
- if not annot.m_internal:
- return
- val = Annot(annot)
- val.thisown = True
- val.parent = weakref.proxy(self) # owning page object
- self._annot_refs[id(val)] = val
- return val
- @property
- def first_link(self):
- '''
- First link on page
- '''
- return self.load_links()
- @property
- def first_widget(self):
- """First widget/field."""
- CheckParent(self)
- annot = 0
- page = self._pdf_page(required=False)
- if not page.m_internal:
- return
- annot = mupdf.pdf_first_widget(page)
- if not annot.m_internal:
- return
- val = Annot(annot)
- val.thisown = True
- val.parent = weakref.proxy(self) # owning page object
- self._annot_refs[id(val)] = val
- widget = Widget()
- TOOLS._fill_widget(val, widget)
- val = widget
- return val
- def get_bboxlog(self, layers=None):
- CheckParent(self)
- old_rotation = self.rotation
- if old_rotation != 0:
- self.set_rotation(0)
- page = self.this
- rc = []
- inc_layers = True if layers else False
- dev = JM_new_bbox_device( rc, inc_layers)
- mupdf.fz_run_page( page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device( dev)
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- return rc
- def get_cdrawings(self, extended=None, callback=None, method=None):
- """Extract vector graphics ("line art") from the page."""
- CheckParent(self)
- old_rotation = self.rotation
- if old_rotation != 0:
- self.set_rotation(0)
- page = self.this
- if isinstance(page, mupdf.PdfPage):
- # Downcast pdf_page to fz_page.
- page = mupdf.FzPage(page)
- assert isinstance(page, mupdf.FzPage), f'{self.this=}'
- clips = True if extended else False
- prect = mupdf.fz_bound_page(page)
- if g_use_extra:
- rc = extra.get_cdrawings(page, extended, callback, method)
- else:
- rc = list()
- if callable(callback) or method is not None:
- dev = JM_new_lineart_device_Device(callback, clips, method)
- else:
- dev = JM_new_lineart_device_Device(rc, clips, method)
- dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
- mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- if callable(callback) or method is not None:
- return
- return rc
- def get_contents(self):
- """Get xrefs of /Contents objects."""
- CheckParent(self)
- ret = []
- page = _as_pdf_page(self.this)
- obj = page.obj()
- contents = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Contents)
- if mupdf.pdf_is_array(contents):
- n = mupdf.pdf_array_len(contents)
- for i in range(n):
- icont = mupdf.pdf_array_get(contents, i)
- xref = mupdf.pdf_to_num(icont)
- ret.append(xref)
- elif contents.m_internal:
- xref = mupdf.pdf_to_num(contents)
- ret.append( xref)
- return ret
- def get_displaylist(self, annots=1):
- '''
- Make a DisplayList from the page for Pixmap generation.
- Include (default) or exclude annotations.
- '''
- CheckParent(self)
- if annots:
- dl = mupdf.fz_new_display_list_from_page(self.this)
- else:
- dl = mupdf.fz_new_display_list_from_page_contents(self.this)
- return DisplayList(dl)
- def get_drawings(self, extended: bool=False) -> list:
- """Retrieve vector graphics. The extended version includes clips.
- Note:
- For greater comfort, this method converts point-likes, rect-likes, quad-likes
- of the C version to respective Point / Rect / Quad objects.
- It also adds default items that are missing in original path types.
- """
- allkeys = (
- 'closePath',
- 'fill',
- 'color',
- 'width',
- 'lineCap',
- 'lineJoin',
- 'dashes',
- 'stroke_opacity',
- 'fill_opacity',
- 'even_odd',
- )
- val = self.get_cdrawings(extended=extended)
- for i in range(len(val)):
- npath = val[i]
- if not npath["type"].startswith("clip"):
- npath["rect"] = Rect(npath["rect"])
- else:
- npath["scissor"] = Rect(npath["scissor"])
- if npath["type"]!="group":
- items = npath["items"]
- newitems = []
- for item in items:
- cmd = item[0]
- rest = item[1:]
- if cmd == "re":
- item = ("re", Rect(rest[0]).normalize(), rest[1])
- elif cmd == "qu":
- item = ("qu", Quad(rest[0]))
- else:
- item = tuple([cmd] + [Point(i) for i in rest])
- newitems.append(item)
- npath["items"] = newitems
- if npath['type'] in ('f', 's'):
- for k in allkeys:
- npath[k] = npath.get(k)
- val[i] = npath
- return val
- class Drawpath(object):
- """Reflects a path dictionary from get_cdrawings()."""
- def __init__(self, **args):
- self.__dict__.update(args)
-
- class Drawpathlist(object):
- """List of Path objects representing get_cdrawings() output."""
- def __getitem__(self, item):
- return self.paths.__getitem__(item)
- def __init__(self):
- self.paths = []
- self.path_count = 0
- self.group_count = 0
- self.clip_count = 0
- self.fill_count = 0
- self.stroke_count = 0
- self.fillstroke_count = 0
- def __len__(self):
- return self.paths.__len__()
- def append(self, path):
- self.paths.append(path)
- self.path_count += 1
- if path.type == "clip":
- self.clip_count += 1
- elif path.type == "group":
- self.group_count += 1
- elif path.type == "f":
- self.fill_count += 1
- elif path.type == "s":
- self.stroke_count += 1
- elif path.type == "fs":
- self.fillstroke_count += 1
- def clip_parents(self, i):
- """Return list of parent clip paths.
- Args:
- i: (int) return parents of this path.
- Returns:
- List of the clip parents."""
- if i >= self.path_count:
- raise IndexError("bad path index")
- while i < 0:
- i += self.path_count
- lvl = self.paths[i].level
- clips = list( # clip paths before identified one
- reversed(
- [
- p
- for p in self.paths[:i]
- if p.type == "clip" and p.level < lvl
- ]
- )
- )
- if clips == []: # none found: empty list
- return []
- nclips = [clips[0]] # init return list
- for p in clips[1:]:
- if p.level >= nclips[-1].level:
- continue # only accept smaller clip levels
- nclips.append(p)
- return nclips
- def group_parents(self, i):
- """Return list of parent group paths.
- Args:
- i: (int) return parents of this path.
- Returns:
- List of the group parents."""
- if i >= self.path_count:
- raise IndexError("bad path index")
- while i < 0:
- i += self.path_count
- lvl = self.paths[i].level
- groups = list( # group paths before identified one
- reversed(
- [
- p
- for p in self.paths[:i]
- if p.type == "group" and p.level < lvl
- ]
- )
- )
- if groups == []: # none found: empty list
- return []
- ngroups = [groups[0]] # init return list
- for p in groups[1:]:
- if p.level >= ngroups[-1].level:
- continue # only accept smaller group levels
- ngroups.append(p)
- return ngroups
- def get_lineart(self) -> object:
- """Get page drawings paths.
- Note:
- For greater comfort, this method converts point-like, rect-like, quad-like
- tuples of the C version to respective Point / Rect / Quad objects.
- Also adds default items that are missing in original path types.
- In contrast to get_drawings(), this output is an object.
- """
- val = self.get_cdrawings(extended=True)
- paths = self.Drawpathlist()
- for path in val:
- npath = self.Drawpath(**path)
- if npath.type != "clip":
- npath.rect = Rect(path["rect"])
- else:
- npath.scissor = Rect(path["scissor"])
- if npath.type != "group":
- items = path["items"]
- newitems = []
- for item in items:
- cmd = item[0]
- rest = item[1:]
- if cmd == "re":
- item = ("re", Rect(rest[0]).normalize(), rest[1])
- elif cmd == "qu":
- item = ("qu", Quad(rest[0]))
- else:
- item = tuple([cmd] + [Point(i) for i in rest])
- newitems.append(item)
- npath.items = newitems
-
- if npath.type == "f":
- npath.stroke_opacity = None
- npath.dashes = None
- npath.line_join = None
- npath.line_cap = None
- npath.color = None
- npath.width = None
- paths.append(npath)
- val = None
- return paths
- def remove_rotation(self):
- """Set page rotation to 0 while maintaining visual appearance."""
- rot = self.rotation # normalized rotation value
- if rot == 0:
- return Identity # nothing to do
- # need to derotate the page's content
- mb = self.mediabox # current mediabox
- if rot == 90:
- # before derotation, shift content horizontally
- mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
- elif rot == 270:
- # before derotation, shift content vertically
- mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
- else: # rot = 180
- mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
- # prefix with derotation matrix
- mat = mat0 * self.derotation_matrix
- cmd = _format_g(tuple(mat)) + ' cm '
- cmd = cmd.encode('utf8')
- _ = TOOLS._insert_contents(self, cmd, False) # prepend to page contents
- # swap x- and y-coordinates
- if rot in (90, 270):
- x0, y0, x1, y1 = mb
- mb.x0 = y0
- mb.y0 = x0
- mb.x1 = y1
- mb.y1 = x1
- self.set_mediabox(mb)
- self.set_rotation(0)
- rot = ~mat # inverse of the derotation matrix
- for annot in self.annots(): # modify rectangles of annotations
- r = annot.rect * rot
- # TODO: only try to set rectangle for applicable annot types
- annot.set_rect(r)
- for link in self.get_links(): # modify 'from' rectangles of links
- r = link["from"] * rot
- self.delete_link(link)
- link["from"] = r
- try: # invalid links remain deleted
- self.insert_link(link)
- except Exception:
- pass
- for widget in self.widgets(): # modify field rectangles
- r = widget.rect * rot
- widget.rect = r
- widget.update()
- return rot # the inverse of the generated derotation matrix
- def cluster_drawings(
- self, clip=None, drawings=None, x_tolerance: float = 3, y_tolerance: float = 3,
- final_filter: bool = True,
- ) -> list:
- """Join rectangles of neighboring vector graphic items.
- Args:
- clip: optional rect-like to restrict the page area to consider.
- drawings: (optional) output of a previous "get_drawings()".
- x_tolerance: horizontal neighborhood threshold.
- y_tolerance: vertical neighborhood threshold.
- Notes:
- Vector graphics (also called line-art or drawings) usually consist
- of independent items like rectangles, lines or curves to jointly
- form table grid lines or bar, line, pie charts and similar.
- This method identifies rectangles wrapping these disparate items.
- Returns:
- A list of Rect items, each wrapping line-art items that are close
- enough to be considered forming a common vector graphic.
- Only "significant" rectangles will be returned, i.e. having both,
- width and height larger than the tolerance values.
- """
- CheckParent(self)
- parea = self.rect # the default clipping area
- if clip is not None:
- parea = Rect(clip)
- delta_x = x_tolerance # shorter local name
- delta_y = y_tolerance # shorter local name
- if drawings is None: # if we cannot re-use a previous output
- drawings = self.get_drawings()
- def are_neighbors(r1, r2):
- """Detect whether r1, r2 are "neighbors".
- Items r1, r2 are called neighbors if the minimum distance between
- their points is less-equal delta.
- Both parameters must be (potentially invalid) rectangles.
- """
- # normalize rectangles as needed
- rr1_x0, rr1_x1 = (r1.x0, r1.x1) if r1.x1 > r1.x0 else (r1.x1, r1.x0)
- rr1_y0, rr1_y1 = (r1.y0, r1.y1) if r1.y1 > r1.y0 else (r1.y1, r1.y0)
- rr2_x0, rr2_x1 = (r2.x0, r2.x1) if r2.x1 > r2.x0 else (r2.x1, r2.x0)
- rr2_y0, rr2_y1 = (r2.y0, r2.y1) if r2.y1 > r2.y0 else (r2.y1, r2.y0)
- if (
- 0
- or rr1_x1 < rr2_x0 - delta_x
- or rr1_x0 > rr2_x1 + delta_x
- or rr1_y1 < rr2_y0 - delta_y
- or rr1_y0 > rr2_y1 + delta_y
- ):
- # Rects do not overlap.
- return False
- else:
- # Rects overlap.
- return True
- # exclude graphics not contained in the clip
- paths = [
- p
- for p in drawings
- if 1
- and p["rect"].x0 >= parea.x0
- and p["rect"].x1 <= parea.x1
- and p["rect"].y0 >= parea.y0
- and p["rect"].y1 <= parea.y1
- ]
- # list of all vector graphic rectangles
- prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
- new_rects = [] # the final list of the joined rectangles
- # -------------------------------------------------------------------------
- # The strategy is to identify and join all rects that are neighbors
- # -------------------------------------------------------------------------
- while prects: # the algorithm will empty this list
- r = +prects[0] # copy of first rectangle
- repeat = True
- while repeat:
- repeat = False
- for i in range(len(prects) - 1, 0, -1): # from back to front
- if are_neighbors(prects[i], r):
- r |= prects[i].tl # include in first rect
- r |= prects[i].br # include in first rect
- del prects[i] # delete this rect
- repeat = True
- new_rects.append(r)
- del prects[0]
- prects = sorted(set(prects), key=lambda r: (r.y1, r.x0))
- new_rects = sorted(set(new_rects), key=lambda r: (r.y1, r.x0))
- if not final_filter:
- return new_rects
- return [r for r in new_rects if r.width > delta_x and r.height > delta_y]
- def get_fonts(self, full=False):
- """List of fonts defined in the page object."""
- CheckParent(self)
- return self.parent.get_page_fonts(self.number, full=full)
- def get_image_bbox(self, name, transform=0):
- """Get rectangle occupied by image 'name'.
- 'name' is either an item of the image list, or the referencing
- name string - elem[7] of the resp. item.
- Option 'transform' also returns the image transformation matrix.
- """
- CheckParent(self)
- doc = self.parent
- if doc.is_closed or doc.is_encrypted:
- raise ValueError('document closed or encrypted')
- inf_rect = Rect(1, 1, -1, -1)
- null_mat = Matrix()
- if transform:
- rc = (inf_rect, null_mat)
- else:
- rc = inf_rect
- if type(name) in (list, tuple):
- if not type(name[-1]) is int:
- raise ValueError('need item of full page image list')
- item = name
- else:
- imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]]
- if len(imglist) == 1:
- item = imglist[0]
- elif imglist == []:
- raise ValueError('bad image name')
- else:
- raise ValueError("found multiple images named '%s'." % name)
- xref = item[-1]
- if xref != 0 or transform:
- try:
- return self.get_image_rects(item, transform=transform)[0]
- except Exception:
- exception_info()
- return inf_rect
- pdf_page = self._pdf_page()
- val = JM_image_reporter(pdf_page)
- if not bool(val):
- return rc
- for v in val:
- if v[0] != item[-3]:
- continue
- q = Quad(v[1])
- bbox = q.rect
- if transform == 0:
- rc = bbox
- break
- hm = Matrix(util_hor_matrix(q.ll, q.lr))
- h = abs(q.ll - q.ul)
- w = abs(q.ur - q.ul)
- m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0)
- m = ~(hm * m0)
- rc = (bbox, m)
- break
- val = rc
- return val
- def get_images(self, full=False):
- """List of images defined in the page object."""
- CheckParent(self)
- return self.parent.get_page_images(self.number, full=full)
- def get_oc_items(self) -> list:
- """Get OCGs and OCMDs used in the page's contents.
- Returns:
- List of items (name, xref, type), where type is one of "ocg" / "ocmd",
- and name is the property name.
- """
- rc = []
- for pname, xref in self._get_resource_properties():
- text = self.parent.xref_object(xref, compressed=True)
- if "/Type/OCG" in text:
- octype = "ocg"
- elif "/Type/OCMD" in text:
- octype = "ocmd"
- else:
- continue
- rc.append((pname, xref, octype))
- return rc
- def get_svg_image(self, matrix=None, text_as_path=1):
- """Make SVG image from page."""
- CheckParent(self)
- mediabox = mupdf.fz_bound_page(self.this)
- ctm = JM_matrix_from_py(matrix)
- tbounds = mediabox
- text_option = mupdf.FZ_SVG_TEXT_AS_PATH if text_as_path == 1 else mupdf.FZ_SVG_TEXT_AS_TEXT
- tbounds = mupdf.fz_transform_rect(tbounds, ctm)
- res = mupdf.fz_new_buffer(1024)
- out = mupdf.FzOutput(res)
- dev = mupdf.fz_new_svg_device(
- out,
- tbounds.x1-tbounds.x0, # width
- tbounds.y1-tbounds.y0, # height
- text_option,
- 1,
- )
- mupdf.fz_run_page(self.this, dev, ctm, mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- out.fz_close_output()
- text = JM_EscapeStrFromBuffer(res)
- return text
- def get_textbox(
- page: Page,
- rect: rect_like,
- textpage=None, #: TextPage = None,
- ) -> str:
- tp = textpage
- if tp is None:
- tp = page.get_textpage()
- elif getattr(tp, "parent") != page:
- raise ValueError("not a textpage of this page")
- rc = tp.extractTextbox(rect)
- if textpage is None:
- del tp
- return rc
- def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
- CheckParent(self)
- if matrix is None:
- matrix = Matrix(1, 1)
- old_rotation = self.rotation
- if old_rotation != 0:
- self.set_rotation(0)
- try:
- textpage = self._get_textpage(clip, flags=flags, matrix=matrix)
- finally:
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- textpage = TextPage(textpage)
- textpage.parent = weakref.proxy(self)
- return textpage
- def get_texttrace(self):
- CheckParent(self)
- old_rotation = self.rotation
- if old_rotation != 0:
- self.set_rotation(0)
- page = self.this
- rc = []
- if g_use_extra:
- dev = extra.JM_new_texttrace_device(rc)
- else:
- dev = JM_new_texttrace_device(rc)
- prect = mupdf.fz_bound_page(page)
- dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
- mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- if old_rotation != 0:
- self.set_rotation(old_rotation)
- return rc
- def get_xobjects(self):
- """List of xobjects defined in the page object."""
- CheckParent(self)
- return self.parent.get_page_xobjects(self.number)
- def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None,
- set_simple=False, wmode=0, encoding=0):
- doc = self.parent
- if doc is None:
- raise ValueError("orphaned object: parent is None")
- idx = 0
- if fontname.startswith("/"):
- fontname = fontname[1:]
- inv_chars = INVALID_NAME_CHARS.intersection(fontname)
- if inv_chars != set():
- raise ValueError(f"bad fontname chars {inv_chars}")
- font = CheckFont(self, fontname)
- if font is not None: # font already in font list of page
- xref = font[0] # this is the xref
- if CheckFontInfo(doc, xref): # also in our document font list?
- return xref # yes: we are done
- # need to build the doc FontInfo entry - done via get_char_widths
- doc.get_char_widths(xref)
- return xref
- #--------------------------------------------------------------------------
- # the font is not present for this page
- #--------------------------------------------------------------------------
- bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font
- serif = 0
- CJK_number = -1
- CJK_list_n = ["china-t", "china-s", "japan", "korea"]
- CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"]
- try:
- CJK_number = CJK_list_n.index(fontname)
- serif = 0
- except Exception:
- # Verbose in PyMuPDF/tests.
- if g_exceptions_verbose > 1: exception_info()
- pass
- if CJK_number < 0:
- try:
- CJK_number = CJK_list_s.index(fontname)
- serif = 1
- except Exception:
- # Verbose in PyMuPDF/tests.
- if g_exceptions_verbose > 1: exception_info()
- pass
- if fontname.lower() in fitz_fontdescriptors.keys():
- import pymupdf_fonts
- fontbuffer = pymupdf_fonts.myfont(fontname) # make a copy
- del pymupdf_fonts
- # install the font for the page
- if fontfile is not None:
- if type(fontfile) is str:
- fontfile_str = fontfile
- elif hasattr(fontfile, "absolute"):
- fontfile_str = str(fontfile)
- elif hasattr(fontfile, "name"):
- fontfile_str = fontfile.name
- else:
- raise ValueError("bad fontfile")
- else:
- fontfile_str = None
- val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx,
- wmode, serif, encoding, CJK_number)
- if not val: # did not work, error return
- return val
- xref = val[0] # xref of installed font
- fontdict = val[1]
- if CheckFontInfo(doc, xref): # check again: document already has this font
- return xref # we are done
- # need to create document font info
- doc.get_char_widths(xref, fontdict=fontdict)
- return xref
- @property
- def is_wrapped(self):
- """Check if /Contents is in a balanced graphics state."""
- return self._count_q_balance() == (0, 0)
- @property
- def language(self):
- """Page language."""
- pdfpage = _as_pdf_page(self.this, required=False)
- if not pdfpage.m_internal:
- return
- lang = mupdf.pdf_dict_get_inheritable(pdfpage.obj(), PDF_NAME('Lang'))
- if not lang.m_internal:
- return
- return mupdf.pdf_to_str_buf(lang)
- def links(self, kinds=None):
- """ Generator over the links of a page.
- Args:
- kinds: (list) link kinds to subselect from. If none,
- all links are returned. E.g. kinds=[LINK_URI]
- will only yield URI links.
- """
- all_links = self.get_links()
- for link in all_links:
- if kinds is None or link["kind"] in kinds:
- yield (link)
- def load_annot(self, ident: typing.Union[str, int]) -> Annot:
- """Load an annot by name (/NM key) or xref.
- Args:
- ident: identifier, either name (str) or xref (int).
- """
- CheckParent(self)
- if type(ident) is str:
- xref = 0
- name = ident
- elif type(ident) is int:
- xref = ident
- name = None
- else:
- raise ValueError("identifier must be a string or integer")
- val = self._load_annot(name, xref)
- if not val:
- return val
- val.thisown = True
- val.parent = weakref.proxy(self)
- self._annot_refs[id(val)] = val
- return val
- def load_links(self):
- """Get first Link."""
- CheckParent(self)
- val = mupdf.fz_load_links( self.this)
- if not val.m_internal:
- return
- val = Link( val)
- val.thisown = True
- val.parent = weakref.proxy(self) # owning page object
- self._annot_refs[id(val)] = val
- val.xref = 0
- val.id = ""
- if self.parent.is_pdf:
- xrefs = self.annot_xrefs()
- xrefs = [x for x in xrefs if x[1] == mupdf.PDF_ANNOT_LINK]
- if xrefs:
- link_id = xrefs[0]
- val.xref = link_id[0]
- val.id = link_id[2]
- else:
- val.xref = 0
- val.id = ""
- return val
- #----------------------------------------------------------------
- # page load widget by xref
- #----------------------------------------------------------------
- def load_widget( self, xref):
- """Load a widget by its xref."""
- CheckParent(self)
- page = _as_pdf_page(self.this)
- annot = JM_get_widget_by_xref( page, xref)
- #log( '{=type(annot)}')
- val = annot
- if not val:
- return val
- val.thisown = True
- val.parent = weakref.proxy(self)
- self._annot_refs[id(val)] = val
- widget = Widget()
- TOOLS._fill_widget(val, widget)
- val = widget
- return val
- @property
- def mediabox(self):
- """The MediaBox."""
- CheckParent(self)
- page = self._pdf_page(required=False)
- if not page.m_internal:
- rect = mupdf.fz_bound_page( self.this)
- else:
- rect = JM_mediabox( page.obj())
- return Rect(rect)
- @property
- def mediabox_size(self):
- return Point(self.mediabox.x1, self.mediabox.y1)
- #@property
- #def parent( self):
- # assert self._parent
- # if self._parent:
- # return self._parent
- # return Document( self.this.document())
- def read_contents(self):
- """All /Contents streams concatenated to one bytes object."""
- return TOOLS._get_all_contents(self)
- def refresh(self):
- """Refresh page after link/annot/widget updates."""
- CheckParent(self)
- doc = self.parent
- page = doc.reload_page(self)
- # fixme this looks wrong.
- self.this = page
- @property
- def rotation(self):
- """Page rotation."""
- CheckParent(self)
- page = _as_pdf_page(self.this, required=0)
- if not page.m_internal:
- return 0
- return JM_page_rotation(page)
- @property
- def rotation_matrix(self) -> Matrix:
- """Reflects page rotation."""
- return Matrix(TOOLS._rotate_matrix(self))
- def run(self, dw, m):
- """Run page through a device.
- dw: DeviceWrapper
- """
- CheckParent(self)
- mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
- def set_artbox(self, rect):
- """Set the ArtBox."""
- return self._set_pagebox("ArtBox", rect)
- def set_bleedbox(self, rect):
- """Set the BleedBox."""
- return self._set_pagebox("BleedBox", rect)
- def set_contents(self, xref):
- """Set object at 'xref' as the page's /Contents."""
- CheckParent(self)
- doc = self.parent
- if doc.is_closed:
- raise ValueError("document closed")
- if not doc.is_pdf:
- raise ValueError("is no PDF")
- if xref not in range(1, doc.xref_length()):
- raise ValueError("bad xref")
- if not doc.xref_is_stream(xref):
- raise ValueError("xref is no stream")
- doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref)
- def set_cropbox(self, rect):
- """Set the CropBox. Will also change Page.rect."""
- return self._set_pagebox("CropBox", rect)
- def set_language(self, language=None):
- """Set PDF page default language."""
- CheckParent(self)
- pdfpage = _as_pdf_page(self.this)
- if not language:
- mupdf.pdf_dict_del(pdfpage.obj(), PDF_NAME('Lang'))
- else:
- lang = mupdf.fz_text_language_from_string(language)
- assert hasattr(mupdf, 'fz_string_from_text_language2')
- mupdf.pdf_dict_put_text_string(
- pdfpage.obj,
- PDF_NAME('Lang'),
- mupdf.fz_string_from_text_language2(lang)
- )
- def set_mediabox(self, rect):
- """Set the MediaBox."""
- CheckParent(self)
- page = self._pdf_page()
- mediabox = JM_rect_from_py(rect)
- if (mupdf.fz_is_empty_rect(mediabox)
- or mupdf.fz_is_infinite_rect(mediabox)
- ):
- raise ValueError( MSG_BAD_RECT)
- mupdf.pdf_dict_put_rect( page.obj(), PDF_NAME('MediaBox'), mediabox)
- mupdf.pdf_dict_del( page.obj(), PDF_NAME('CropBox'))
- mupdf.pdf_dict_del( page.obj(), PDF_NAME('ArtBox'))
- mupdf.pdf_dict_del( page.obj(), PDF_NAME('BleedBox'))
- mupdf.pdf_dict_del( page.obj(), PDF_NAME('TrimBox'))
- def set_rotation(self, rotation):
- """Set page rotation."""
- CheckParent(self)
- page = _as_pdf_page(self.this)
- rot = JM_norm_rotation(rotation)
- mupdf.pdf_dict_put_int( page.obj(), PDF_NAME('Rotate'), rot)
- def set_trimbox(self, rect):
- """Set the TrimBox."""
- return self._set_pagebox("TrimBox", rect)
- @property
- def transformation_matrix(self):
- """Page transformation matrix."""
- CheckParent(self)
- ctm = mupdf.FzMatrix()
- page = self._pdf_page(required=False)
- if not page.m_internal:
- return JM_py_from_matrix(ctm)
- mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) # fixme: original code passed mediabox=NULL.
- mupdf.pdf_page_transform(page, mediabox, ctm)
- val = JM_py_from_matrix(ctm)
- if self.rotation % 360 == 0:
- val = Matrix(val)
- else:
- val = Matrix(1, 0, 0, -1, 0, self.cropbox.height)
- return val
- @property
- def trimbox(self):
- """The TrimBox"""
- rect = self._other_box("TrimBox")
- if rect is None:
- return self.cropbox
- mb = self.mediabox
- return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
- def widgets(self, types=None):
- """ Generator over the widgets of a page.
- Args:
- types: (list) field types to subselect from. If none,
- all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT]
- will only yield text fields.
- """
- #for a in self.annot_xrefs():
- # log( '{a=}')
- widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == mupdf.PDF_ANNOT_WIDGET]
- #log(f'widgets(): {widget_xrefs=}')
- for xref in widget_xrefs:
- widget = self.load_widget(xref)
- if types is None or widget.field_type in types:
- yield (widget)
- def wrap_contents(self):
- """Ensure page is in a balanced graphics state."""
- push, pop = self._count_q_balance() # count missing "q"/"Q" commands
- if push > 0: # prepend required push commands
- prepend = b"q\n" * push
- TOOLS._insert_contents(self, prepend, False)
- if pop > 0: # append required pop commands
- append = b"\nQ" * pop + b"\n"
- TOOLS._insert_contents(self, append, True)
- @property
- def xref(self):
- """PDF xref number of page."""
- CheckParent(self)
- return self.parent.page_xref(self.number)
- rect = property(bound, doc="page rectangle")
- class Pixmap:
- def __init__(self, *args):
- """
- Pixmap(colorspace, irect, alpha) - empty pixmap.
- Pixmap(colorspace, src) - copy changing colorspace.
- Pixmap(src, width, height,[clip]) - scaled copy, float dimensions.
- Pixmap(src, alpha=1) - copy and add or drop alpha channel.
- Pixmap(filename) - from an image in a file.
- Pixmap(image) - from an image in memory (bytes).
- Pixmap(colorspace, width, height, samples, alpha) - from samples data.
- Pixmap(PDFdoc, xref) - from an image at xref in a PDF document.
- """
- # Cache for property `self.samples_mv`. Set here so __del_() sees it if
- # we raise.
- #
- self._samples_mv = None
- # 2024-01-16: Experimental support for a memory-view of the underlying
- # data. Doesn't seem to make much difference to Pixmap.set_pixel() so
- # not currently used.
- self._memory_view = None
-
- if 0:
- pass
- elif args_match(args,
- (Colorspace, mupdf.FzColorspace),
- (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple)
- ):
- # create empty pixmap with colorspace and IRect
- cs, rect = args
- alpha = 0
- pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
- self.this = pm
- elif args_match(args,
- (Colorspace, mupdf.FzColorspace),
- (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple),
- (int, bool)
- ):
- # create empty pixmap with colorspace and IRect
- cs, rect, alpha = args
- pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
- self.this = pm
- elif args_match(args, (Colorspace, mupdf.FzColorspace, type(None)), (Pixmap, mupdf.FzPixmap)):
- # copy pixmap, converting colorspace
- cs, spix = args
- if isinstance(cs, Colorspace):
- cs = cs.this
- elif cs is None:
- cs = mupdf.FzColorspace(None)
- if isinstance(spix, Pixmap):
- spix = spix.this
- if not mupdf.fz_pixmap_colorspace(spix).m_internal:
- raise ValueError( "source colorspace must not be None")
-
- if cs.m_internal:
- self.this = mupdf.fz_convert_pixmap(
- spix,
- cs,
- mupdf.FzColorspace(),
- mupdf.FzDefaultColorspaces(None),
- mupdf.FzColorParams(),
- 1
- )
- else:
- self.this = mupdf.fz_new_pixmap_from_alpha_channel( spix)
- if not self.this.m_internal:
- raise RuntimeError( MSG_PIX_NOALPHA)
- elif args_match(args, (Pixmap, mupdf.FzPixmap), (Pixmap, mupdf.FzPixmap)):
- # add mask to a pixmap w/o alpha channel
- spix, mpix = args
- if isinstance(spix, Pixmap):
- spix = spix.this
- if isinstance(mpix, Pixmap):
- mpix = mpix.this
- spm = spix
- mpm = mpix
- if not spix.m_internal: # intercept NULL for spix: make alpha only pix
- dst = mupdf.fz_new_pixmap_from_alpha_channel(mpm)
- if not dst.m_internal:
- raise RuntimeError( MSG_PIX_NOALPHA)
- else:
- dst = mupdf.fz_new_pixmap_from_color_and_mask(spm, mpm)
- self.this = dst
- elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or
- args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))):
- # create pixmap as scaled copy of another one
- if len(args) == 3:
- spix, w, h = args
- bbox = mupdf.FzIrect(mupdf.fz_infinite_irect)
- else:
- spix, w, h, clip = args
- bbox = JM_irect_from_py(clip)
-
- src_pix = spix.this if isinstance(spix, Pixmap) else spix
- if not mupdf.fz_is_infinite_irect(bbox):
- pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, bbox)
- else:
- pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, mupdf.FzIrect(mupdf.fz_infinite_irect))
- self.this = pm
- elif args_match(args, str, (Pixmap, mupdf.FzPixmap)) and args[0] == 'raw':
- # Special raw construction where we set .this directly.
- _, pm = args
- if isinstance(pm, Pixmap):
- pm = pm.this
- self.this = pm
- elif args_match(args, (Pixmap, mupdf.FzPixmap), (int, None)):
- # Pixmap(struct Pixmap *spix, int alpha=1)
- # copy pixmap & add / drop the alpha channel
- spix = args[0]
- alpha = args[1] if len(args) == 2 else 1
- src_pix = spix.this if isinstance(spix, Pixmap) else spix
- if not _INRANGE(alpha, 0, 1):
- raise ValueError( "bad alpha value")
- cs = mupdf.fz_pixmap_colorspace(src_pix)
- if not cs.m_internal and not alpha:
- raise ValueError( "cannot drop alpha for 'NULL' colorspace")
- seps = mupdf.FzSeparations()
- n = mupdf.fz_pixmap_colorants(src_pix)
- w = mupdf.fz_pixmap_width(src_pix)
- h = mupdf.fz_pixmap_height(src_pix)
- pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
- pm.m_internal.x = src_pix.m_internal.x
- pm.m_internal.y = src_pix.m_internal.y
- pm.m_internal.xres = src_pix.m_internal.xres
- pm.m_internal.yres = src_pix.m_internal.yres
- # copy samples data ------------------------------------------
- if 1:
- # We use our pixmap_copy() to get best performance.
- # test_pixmap.py:test_setalpha(): 3.9s t=0.0062
- extra.pixmap_copy( pm.m_internal, src_pix.m_internal, n)
- elif 1:
- # Use memoryview.
- # test_pixmap.py:test_setalpha(): 4.6 t=0.51
- src_view = mupdf.fz_pixmap_samples_memoryview( src_pix)
- pm_view = mupdf.fz_pixmap_samples_memoryview( pm)
- if src_pix.alpha() == pm.alpha(): # identical samples
- #memcpy(tptr, sptr, w * h * (n + alpha));
- size = w * h * (n + alpha)
- pm_view[ 0 : size] = src_view[ 0 : size]
- else:
- tptr = 0
- sptr = 0
- # This is a little faster than calling
- # pm.fz_samples_set(), but still quite slow. E.g. reduces
- # test_pixmap.py:test_setalpha() from 6.7s to 4.5s.
- #
- # t=0.53
- pm_stride = pm.stride()
- pm_n = pm.n()
- pm_alpha = pm.alpha()
- src_stride = src_pix.stride()
- src_n = src_pix.n()
- #log( '{=pm_stride pm_n src_stride src_n}')
- for y in range( h):
- for x in range( w):
- pm_i = pm_stride * y + pm_n * x
- src_i = src_stride * y + src_n * x
- pm_view[ pm_i : pm_i + n] = src_view[ src_i : src_i + n]
- if pm_alpha:
- pm_view[ pm_i + n] = 255
- else:
- # Copy individual bytes from Python. Very slow.
- # test_pixmap.py:test_setalpha(): 6.89 t=2.601
- if src_pix.alpha() == pm.alpha(): # identical samples
- #memcpy(tptr, sptr, w * h * (n + alpha));
- for i in range(w * h * (n + alpha)):
- mupdf.fz_samples_set(pm, i, mupdf.fz_samples_get(src_pix, i))
- else:
- # t=2.56
- tptr = 0
- sptr = 0
- src_pix_alpha = src_pix.alpha()
- for i in range(w * h):
- #memcpy(tptr, sptr, n);
- for j in range(n):
- mupdf.fz_samples_set(pm, tptr + j, mupdf.fz_samples_get(src_pix, sptr + j))
- tptr += n
- if pm.alpha():
- mupdf.fz_samples_set(pm, tptr, 255)
- tptr += 1
- sptr += n + src_pix_alpha
- self.this = pm
- elif args_match(args, (mupdf.FzColorspace, Colorspace), int, int, None, (int, bool)):
- # create pixmap from samples data
- cs, w, h, samples, alpha = args
- if isinstance(cs, Colorspace):
- cs = cs.this
- assert isinstance(cs, mupdf.FzColorspace)
- n = mupdf.fz_colorspace_n(cs)
- stride = (n + alpha) * w
- seps = mupdf.FzSeparations()
- pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
- if isinstance( samples, (bytes, bytearray)):
- #log('using mupdf.python_buffer_data()')
- samples2 = mupdf.python_buffer_data(samples)
- size = len(samples)
- else:
- res = JM_BufferFromBytes(samples)
- if not res.m_internal:
- raise ValueError( "bad samples data")
- size, c = mupdf.fz_buffer_storage(res)
- samples2 = mupdf.python_buffer_data(samples) # raw swig proxy for `const unsigned char*`.
- if stride * h != size:
- raise ValueError( f"bad samples length {w=} {h=} {alpha=} {n=} {stride=} {size=}")
- mupdf.ll_fz_pixmap_copy_raw( pm.m_internal, samples2)
- self.this = pm
- elif args_match(args, None):
- # create pixmap from filename, file object, pathlib.Path or memory
- imagedata, = args
- name = 'name'
- if hasattr(imagedata, "resolve"):
- fname = imagedata.__str__()
- if fname:
- img = mupdf.fz_new_image_from_file(fname)
- elif hasattr(imagedata, name):
- fname = imagedata.name
- if fname:
- img = mupdf.fz_new_image_from_file(fname)
- elif isinstance(imagedata, str):
- img = mupdf.fz_new_image_from_file(imagedata)
- else:
- res = JM_BufferFromBytes(imagedata)
- if not res.m_internal or not res.m_internal.len:
- raise ValueError( "bad image data")
- img = mupdf.fz_new_image_from_buffer(res)
- # Original code passed null for subarea and ctm, but that's not
- # possible with MuPDF's python bindings. The equivalent is an
- # infinite rect and identify matrix scaled by img.w() and img.h().
- pm, w, h = mupdf.fz_get_pixmap_from_image(
- img,
- mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
- mupdf.FzMatrix( img.w(), 0, 0, img.h(), 0, 0),
- )
- xres, yres = mupdf.fz_image_resolution(img)
- pm.m_internal.xres = xres
- pm.m_internal.yres = yres
- self.this = pm
- elif args_match(args, (Document, mupdf.FzDocument), int):
- # Create pixmap from PDF image identified by XREF number
- doc, xref = args
- pdf = _as_pdf_document(doc)
- xreflen = mupdf.pdf_xref_len(pdf)
- if not _INRANGE(xref, 1, xreflen-1):
- raise ValueError( MSG_BAD_XREF)
- ref = mupdf.pdf_new_indirect(pdf, xref, 0)
- type_ = mupdf.pdf_dict_get(ref, PDF_NAME('Subtype'))
- if (not mupdf.pdf_name_eq(type_, PDF_NAME('Image'))
- and not mupdf.pdf_name_eq(type_, PDF_NAME('Alpha'))
- and not mupdf.pdf_name_eq(type_, PDF_NAME('Luminosity'))
- ):
- raise ValueError( MSG_IS_NO_IMAGE)
- img = mupdf.pdf_load_image(pdf, ref)
- # Original code passed null for subarea and ctm, but that's not
- # possible with MuPDF's python bindings. The equivalent is an
- # infinite rect and identify matrix scaled by img.w() and img.h().
- pix, w, h = mupdf.fz_get_pixmap_from_image(
- img,
- mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
- mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0),
- )
- self.this = pix
- else:
- text = 'Unrecognised args for constructing Pixmap:\n'
- for arg in args:
- text += f' {type(arg)}: {arg}\n'
- raise Exception( text)
- def __len__(self):
- return self.size
- def __repr__(self):
- if not type(self) is Pixmap: return
- if self.colorspace:
- return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha)
- else:
- return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha)
- def _tobytes(self, format_, jpg_quality):
- '''
- Pixmap._tobytes
- '''
- pm = self.this
- size = mupdf.fz_pixmap_stride(pm) * pm.h()
- res = mupdf.fz_new_buffer(size)
- out = mupdf.FzOutput(res)
- if format_ == 1: mupdf.fz_write_pixmap_as_png(out, pm)
- elif format_ == 2: mupdf.fz_write_pixmap_as_pnm(out, pm)
- elif format_ == 3: mupdf.fz_write_pixmap_as_pam(out, pm)
- elif format_ == 5: mupdf.fz_write_pixmap_as_psd(out, pm)
- elif format_ == 6: mupdf.fz_write_pixmap_as_ps(out, pm)
- elif format_ == 7:
- mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality, 0)
- else:
- mupdf.fz_write_pixmap_as_png(out, pm)
- out.fz_close_output()
- barray = JM_BinFromBuffer(res)
- return barray
- def _writeIMG(self, filename, format_, jpg_quality):
- pm = self.this
- if format_ == 1: mupdf.fz_save_pixmap_as_png(pm, filename)
- elif format_ == 2: mupdf.fz_save_pixmap_as_pnm(pm, filename)
- elif format_ == 3: mupdf.fz_save_pixmap_as_pam(pm, filename)
- elif format_ == 5: mupdf.fz_save_pixmap_as_psd(pm, filename)
- elif format_ == 6: mupdf.fz_save_pixmap_as_ps(pm, filename)
- elif format_ == 7: mupdf.fz_save_pixmap_as_jpeg(pm, filename, jpg_quality)
- else: mupdf.fz_save_pixmap_as_png(pm, filename)
- @property
- def alpha(self):
- """Indicates presence of alpha channel."""
- return mupdf.fz_pixmap_alpha(self.this)
- def clear_with(self, value=None, bbox=None):
- """Fill all color components with same value."""
- if value is None:
- mupdf.fz_clear_pixmap(self.this)
- elif bbox is None:
- mupdf.fz_clear_pixmap_with_value(self.this, value)
- else:
- JM_clear_pixmap_rect_with_value(self.this, value, JM_irect_from_py(bbox))
- def color_count(self, colors=0, clip=None):
- '''
- Return count of each color.
- '''
- pm = self.this
- rc = JM_color_count( pm, clip)
- if not colors:
- return len( rc)
- return rc
- def color_topusage(self, clip=None):
- """Return most frequent color and its usage ratio."""
- allpixels = 0
- cnt = 0
- if clip is not None and self.irect in Rect(clip):
- clip = self.irect
- for pixel, count in self.color_count(colors=True,clip=clip).items():
- allpixels += count
- if count > cnt:
- cnt = count
- maxpixel = pixel
- if not allpixels:
- return (1, bytes([255] * self.n))
- return (cnt / allpixels, maxpixel)
- @property
- def colorspace(self):
- """Pixmap Colorspace."""
- cs = Colorspace(mupdf.fz_pixmap_colorspace(self.this))
- if cs.name == "None":
- return None
- return cs
- def copy(self, src, bbox):
- """Copy bbox from another Pixmap."""
- pm = self.this
- src_pix = src.this
- if not mupdf.fz_pixmap_colorspace(src_pix):
- raise ValueError( "cannot copy pixmap with NULL colorspace")
- if pm.alpha() != src_pix.alpha():
- raise ValueError( "source and target alpha must be equal")
- mupdf.fz_copy_pixmap_rect(pm, src_pix, JM_irect_from_py(bbox), mupdf.FzDefaultColorspaces(None))
- @property
- def digest(self):
- """MD5 digest of pixmap (bytes)."""
- ret = mupdf.fz_md5_pixmap2(self.this)
- return bytes(ret)
- def gamma_with(self, gamma):
- """Apply correction with some float.
- gamma=1 is a no-op."""
- if not mupdf.fz_pixmap_colorspace( self.this):
- message_warning("colorspace invalid for function")
- return
- mupdf.fz_gamma_pixmap( self.this, gamma)
- @property
- def h(self):
- """The height."""
- return mupdf.fz_pixmap_height(self.this)
- def invert_irect(self, bbox=None):
- """Invert the colors inside a bbox."""
- pm = self.this
- if not mupdf.fz_pixmap_colorspace(pm).m_internal:
- message_warning("ignored for stencil pixmap")
- return False
- r = JM_irect_from_py(bbox)
- if mupdf.fz_is_infinite_irect(r):
- mupdf.fz_invert_pixmap(pm)
- return True
- mupdf.fz_invert_pixmap_rect(pm, r)
- return True
- @property
- def irect(self):
- """Pixmap bbox - an IRect object."""
- val = mupdf.fz_pixmap_bbox(self.this)
- return JM_py_from_irect( val)
- @property
- def is_monochrome(self):
- """Check if pixmap is monochrome."""
- return mupdf.fz_is_pixmap_monochrome( self.this)
- @property
- def is_unicolor(self):
- '''
- Check if pixmap has only one color.
- '''
- pm = self.this
- n = pm.n()
- count = pm.w() * pm.h() * n
- def _pixmap_read_samples(pm, offset, n):
- ret = list()
- for i in range(n):
- ret.append(mupdf.fz_samples_get(pm, offset+i))
- return ret
- for offset in range( 0, count, n):
- if offset == 0:
- sample0 = _pixmap_read_samples( pm, 0, n)
- else:
- sample = _pixmap_read_samples( pm, offset, n)
- if sample != sample0:
- return False
- return True
- @property
- def n(self):
- """The size of one pixel."""
- if g_use_extra:
- # Setting self.__class__.n gives a small reduction in overhead of
- # test_general.py:test_2093, e.g. 1.4x -> 1.3x.
- #return extra.pixmap_n(self.this)
- def n2(self):
- return extra.pixmap_n(self.this)
- self.__class__.n = property(n2)
- return self.n
- return mupdf.fz_pixmap_components(self.this)
- def pdfocr_save(self, filename, compress=1, language=None, tessdata=None):
- '''
- Save pixmap as an OCR-ed PDF page.
- '''
- tessdata = get_tessdata(tessdata)
- opts = mupdf.FzPdfocrOptions()
- opts.compress = compress
- if language:
- opts.language_set2( language)
- if tessdata:
- opts.datadir_set2( tessdata)
- pix = self.this
- if isinstance(filename, str):
- mupdf.fz_save_pixmap_as_pdfocr( pix, filename, 0, opts)
- else:
- out = JM_new_output_fileptr( filename)
- try:
- mupdf.fz_write_pixmap_as_pdfocr( out, pix, opts)
- finally:
- out.fz_close_output() # Avoid MuPDF warning.
- def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
- """Save pixmap as an OCR-ed PDF page.
- Args:
- compress: (bool) compress, default 1 (True).
- language: (str) language(s) occurring on page, default "eng" (English),
- multiples like "eng+ger" for English and German.
- tessdata: (str) folder name of Tesseract's language support. If None
- we use environment variable TESSDATA_PREFIX or search for
- Tesseract installation.
- Notes:
- On failure, make sure Tesseract is installed and you have set
- <tessdata> or environment variable "TESSDATA_PREFIX" to the folder
- containing your Tesseract's language support data.
- """
- tessdata = get_tessdata(tessdata)
- from io import BytesIO
- bio = BytesIO()
- self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata)
- return bio.getvalue()
- def pil_image(self):
- """Create a Pillow Image from the Pixmap."""
- try:
- from PIL import Image
- except ImportError:
- message("PIL/Pillow not installed")
- raise
- cspace = self.colorspace
- if not cspace:
- mode = "L"
- elif cspace.n == 1:
- mode = "L" if not self.alpha else "LA"
- elif cspace.n == 3:
- mode = "RGB" if not self.alpha else "RGBA"
- else:
- mode = "CMYK"
- img = Image.frombytes(mode, (self.width, self.height), self.samples)
- return img
- def pil_save(self, *args, **kwargs):
- """Write to image file using Pillow.
- An intermediate PIL Image is created, and its "save" method is used
- to store the image. See Pillow documentation to learn about the
- meaning of possible positional and keyword parameters.
- Use this when other output formats are desired.
- """
- img = self.pil_image()
- if "dpi" not in kwargs.keys():
- kwargs["dpi"] = (self.xres, self.yres)
- img.save(*args, **kwargs)
- def pil_tobytes(self, *args, **kwargs):
- """Convert to an image in memory using Pillow.
- An intermediate PIL Image is created, and its "save" method is used
- to store the image. See Pillow documentation to learn about the
- meaning of possible positional or keyword parameters.
- Use this when other output formats are desired.
- """
- bytes_out = io.BytesIO()
- img = self.pil_image()
- if "dpi" not in kwargs.keys():
- kwargs["dpi"] = (self.xres, self.yres)
- img.save(bytes_out, *args, **kwargs)
- return bytes_out.getvalue()
- def pixel(self, x, y):
- """Get color tuple of pixel (x, y).
- Last item is the alpha if Pixmap.alpha is true."""
- if g_use_extra:
- return extra.pixmap_pixel(self.this.m_internal, x, y)
- if (0
- or x < 0
- or x >= self.this.m_internal.w
- or y < 0
- or y >= self.this.m_internal.h
- ):
- RAISEPY(MSG_PIXEL_OUTSIDE, PyExc_ValueError)
- n = self.this.m_internal.n
- stride = self.this.m_internal.stride
- i = stride * y + n * x
- ret = tuple( self.samples_mv[ i: i+n])
- return ret
- @property
- def samples(self)->bytes:
- mv = self.samples_mv
- return bytes( mv)
- @property
- def samples_mv(self):
- '''
- Pixmap samples memoryview.
- '''
- # We remember the returned memoryview so that our `__del__()` can
- # release it; otherwise accessing it after we have been destructed will
- # fail, possibly crashing Python; this is #4155.
- #
- if self._samples_mv is None:
- self._samples_mv = mupdf.fz_pixmap_samples_memoryview(self.this)
- return self._samples_mv
-
- def _samples_mv_release(self):
- if self._samples_mv:
- self._samples_mv.release()
- @property
- def samples_ptr(self):
- return mupdf.fz_pixmap_samples_int(self.this)
- def save(self, filename, output=None, jpg_quality=95):
- """Output as image in format determined by filename extension.
- Args:
- output: (str) only use to overrule filename extension. Default is PNG.
- Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS.
- """
- valid_formats = {
- "png": 1,
- "pnm": 2,
- "pgm": 2,
- "ppm": 2,
- "pbm": 2,
- "pam": 3,
- "psd": 5,
- "ps": 6,
- "jpg": 7,
- "jpeg": 7,
- }
-
- if type(filename) is str:
- pass
- elif hasattr(filename, "absolute"):
- filename = str(filename)
- elif hasattr(filename, "name"):
- filename = filename.name
- if output is None:
- _, ext = os.path.splitext(filename)
- output = ext[1:]
- idx = valid_formats.get(output.lower(), None)
- if idx is None:
- raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
- if self.alpha and idx in (2, 6, 7):
- raise ValueError("'%s' cannot have alpha" % output)
- if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
- raise ValueError(f"unsupported colorspace for '{output}'")
- if idx == 7:
- self.set_dpi(self.xres, self.yres)
- return self._writeIMG(filename, idx, jpg_quality)
- def set_alpha(self, alphavalues=None, premultiply=1, opaque=None, matte=None):
- """Set alpha channel to values contained in a byte array.
- If omitted, set alphas to 255.
- Args:
- alphavalues: (bytes) with length (width * height) or 'None'.
- premultiply: (bool, True) premultiply colors with alpha values.
- opaque: (tuple, length colorspace.n) this color receives opacity 0.
- matte: (tuple, length colorspace.n)) preblending background color.
- """
- pix = self.this
- alpha = 0
- m = 0
- if pix.alpha() == 0:
- raise ValueError( MSG_PIX_NOALPHA)
- n = mupdf.fz_pixmap_colorants(pix)
- w = mupdf.fz_pixmap_width(pix)
- h = mupdf.fz_pixmap_height(pix)
- balen = w * h * (n+1)
- colors = [0, 0, 0, 0] # make this color opaque
- bgcolor = [0, 0, 0, 0] # preblending background color
- zero_out = 0
- bground = 0
- if opaque and isinstance(opaque, (list, tuple)) and len(opaque) == n:
- for i in range(n):
- colors[i] = opaque[i]
- zero_out = 1
- if matte and isinstance( matte, (tuple, list)) and len(matte) == n:
- for i in range(n):
- bgcolor[i] = matte[i]
- bground = 1
- data = bytes()
- data_len = 0
- if alphavalues:
- #res = JM_BufferFromBytes(alphavalues)
- #data_len, data = mupdf.fz_buffer_storage(res)
- #if data_len < w * h:
- # THROWMSG("bad alpha values")
- # fixme: don't seem to need to create an fz_buffer - can
- # use <alphavalues> directly?
- if isinstance(alphavalues, (bytes, bytearray)):
- data = alphavalues
- data_len = len(alphavalues)
- else:
- assert 0, f'unexpected type for alphavalues: {type(alphavalues)}'
- if data_len < w * h:
- raise ValueError( "bad alpha values")
- if 1:
- # Use C implementation for speed.
- mupdf.Pixmap_set_alpha_helper(
- balen,
- n,
- data_len,
- zero_out,
- mupdf.python_buffer_data( data),
- pix.m_internal,
- premultiply,
- bground,
- colors,
- bgcolor,
- )
- else:
- i = k = j = 0
- data_fix = 255
- while i < balen:
- alpha = data[k]
- if zero_out:
- for j in range(i, i+n):
- if mupdf.fz_samples_get(pix, j) != colors[j - i]:
- data_fix = 255
- break
- else:
- data_fix = 0
- if data_len:
- def fz_mul255( a, b):
- x = a * b + 128
- x += x // 256
- return x // 256
- if data_fix == 0:
- mupdf.fz_samples_set(pix, i+n, 0)
- else:
- mupdf.fz_samples_set(pix, i+n, alpha)
- if premultiply and not bground:
- for j in range(i, i+n):
- mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j), alpha))
- elif bground:
- for j in range( i, i+n):
- m = bgcolor[j - i]
- mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j) - m, alpha))
- else:
- mupdf.fz_samples_set(pix, i+n, data_fix)
- i += n+1
- k += 1
- def tobytes(self, output="png", jpg_quality=95):
- '''
- Convert to binary image stream of desired type.
- '''
- valid_formats = {
- "png": 1,
- "pnm": 2,
- "pgm": 2,
- "ppm": 2,
- "pbm": 2,
- "pam": 3,
- "tga": 4,
- "tpic": 4,
- "psd": 5,
- "ps": 6,
- 'jpg': 7,
- 'jpeg': 7,
- }
- idx = valid_formats.get(output.lower(), None)
- if idx is None:
- raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
- if self.alpha and idx in (2, 6, 7):
- raise ValueError("'{output}' cannot have alpha")
- if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
- raise ValueError(f"unsupported colorspace for '{output}'")
- if idx == 7:
- self.set_dpi(self.xres, self.yres)
- barray = self._tobytes(idx, jpg_quality)
- return barray
- def set_dpi(self, xres, yres):
- """Set resolution in both dimensions."""
- pm = self.this
- pm.m_internal.xres = xres
- pm.m_internal.yres = yres
- def set_origin(self, x, y):
- """Set top-left coordinates."""
- pm = self.this
- pm.m_internal.x = x
- pm.m_internal.y = y
- def set_pixel(self, x, y, color):
- """Set color of pixel (x, y)."""
- if g_use_extra:
- return extra.set_pixel(self.this.m_internal, x, y, color)
- pm = self.this
- if not _INRANGE(x, 0, pm.w() - 1) or not _INRANGE(y, 0, pm.h() - 1):
- raise ValueError( MSG_PIXEL_OUTSIDE)
- n = pm.n()
- for j in range(n):
- i = color[j]
- if not _INRANGE(i, 0, 255):
- raise ValueError( MSG_BAD_COLOR_SEQ)
- stride = mupdf.fz_pixmap_stride( pm)
- i = stride * y + n * x
- if 0:
- # Using a cached self._memory_view doesn't actually make much
- # difference to speed.
- if not self._memory_view:
- self._memory_view = self.samples_mv
- for j in range(n):
- self._memory_view[i + j] = color[j]
- else:
- for j in range(n):
- pm.fz_samples_set(i + j, color[j])
- def set_rect(self, bbox, color):
- """Set color of all pixels in bbox."""
- pm = self.this
- n = pm.n()
- c = []
- for j in range(n):
- i = color[j]
- if not _INRANGE(i, 0, 255):
- raise ValueError( MSG_BAD_COLOR_SEQ)
- c.append(i)
- bbox = JM_irect_from_py(bbox)
- i = JM_fill_pixmap_rect_with_color(pm, c, bbox)
- rc = bool(i)
- return rc
- def shrink(self, factor):
- """Divide width and height by 2**factor.
- E.g. factor=1 shrinks to 25% of original size (in place)."""
- if factor < 1:
- message_warning("ignoring shrink factor < 1")
- return
- mupdf.fz_subsample_pixmap( self.this, factor)
- # Pixmap has changed so clear our memory view.
- self._memory_view = None
- self._samples_mv_release()
- @property
- def size(self):
- """Pixmap size."""
- return mupdf.fz_pixmap_size( self.this)
- @property
- def stride(self):
- """Length of one image line (width * n)."""
- return self.this.stride()
- def tint_with(self, black, white):
- """Tint colors with modifiers for black and white."""
- if not self.colorspace or self.colorspace.n > 3:
- message("warning: colorspace invalid for function")
- return
- return mupdf.fz_tint_pixmap( self.this, black, white)
- @property
- def w(self):
- """The width."""
- return mupdf.fz_pixmap_width(self.this)
-
- def warp(self, quad, width, height):
- """Return pixmap from a warped quad."""
- if not quad.is_convex: raise ValueError("quad must be convex")
- q = JM_quad_from_py(quad)
- points = [ q.ul, q.ur, q.lr, q.ll]
- dst = mupdf.fz_warp_pixmap( self.this, points, width, height)
- return Pixmap( dst)
- @property
- def x(self):
- """x component of Pixmap origin."""
- return mupdf.fz_pixmap_x(self.this)
- @property
- def xres(self):
- """Resolution in x direction."""
- return self.this.xres()
- @property
- def y(self):
- """y component of Pixmap origin."""
- return mupdf.fz_pixmap_y(self.this)
- @property
- def yres(self):
- """Resolution in y direction."""
- return self.this.yres()
- width = w
- height = h
-
- def __del__(self):
- if self._samples_mv:
- self._samples_mv.release()
- del Point
- class Point:
- def __abs__(self):
- return math.sqrt(self.x * self.x + self.y * self.y)
- def __add__(self, p):
- if hasattr(p, "__float__"):
- return Point(self.x + p, self.y + p)
- if len(p) != 2:
- raise ValueError("Point: bad seq len")
- return Point(self.x + p[0], self.y + p[1])
- def __bool__(self):
- return not (max(self) == min(self) == 0)
- def __eq__(self, p):
- if not hasattr(p, "__len__"):
- return False
- return len(p) == 2 and not (self - p)
- def __getitem__(self, i):
- return (self.x, self.y)[i]
- def __hash__(self):
- return hash(tuple(self))
- def __init__(self, *args, x=None, y=None):
- '''
- Point() - all zeros
- Point(x, y)
- Point(Point) - new copy
- Point(sequence) - from 'sequence'
- Explicit keyword args x, y override earlier settings if not None.
- '''
- if not args:
- self.x = 0.0
- self.y = 0.0
- elif len(args) > 2:
- raise ValueError("Point: bad seq len")
- elif len(args) == 2:
- self.x = float(args[0])
- self.y = float(args[1])
- elif len(args) == 1:
- l = args[0]
- if isinstance(l, (mupdf.FzPoint, mupdf.fz_point)):
- self.x = l.x
- self.y = l.y
- else:
- if not hasattr(l, "__getitem__"):
- raise ValueError("Point: bad args")
- if len(l) != 2:
- raise ValueError("Point: bad seq len")
- self.x = float(l[0])
- self.y = float(l[1])
- else:
- raise ValueError("Point: bad seq len")
- if x is not None: self.x = x
- if y is not None: self.y = y
- def __len__(self):
- return 2
- def __mul__(self, m):
- if hasattr(m, "__float__"):
- return Point(self.x * m, self.y * m)
- if hasattr(m, "__getitem__") and len(m) == 2:
- # dot product
- return self.x * m[0] + self.y * m[1]
- p = Point(self)
- return p.transform(m)
- def __neg__(self):
- return Point(-self.x, -self.y)
- def __nonzero__(self):
- return not (max(self) == min(self) == 0)
- def __pos__(self):
- return Point(self)
- def __repr__(self):
- return "Point" + str(tuple(self))
- def __setitem__(self, i, v):
- v = float(v)
- if i == 0: self.x = v
- elif i == 1: self.y = v
- else:
- raise IndexError("index out of range")
- return None
- def __sub__(self, p):
- if hasattr(p, "__float__"):
- return Point(self.x - p, self.y - p)
- if len(p) != 2:
- raise ValueError("Point: bad seq len")
- return Point(self.x - p[0], self.y - p[1])
- def __truediv__(self, m):
- if hasattr(m, "__float__"):
- return Point(self.x * 1./m, self.y * 1./m)
- m1 = util_invert_matrix(m)[1]
- if not m1:
- raise ZeroDivisionError("matrix not invertible")
- p = Point(self)
- return p.transform(m1)
- @property
- def abs_unit(self):
- """Unit vector with positive coordinates."""
- s = self.x * self.x + self.y * self.y
- if s < EPSILON:
- return Point(0,0)
- s = math.sqrt(s)
- return Point(abs(self.x) / s, abs(self.y) / s)
- def distance_to(self, *args):
- """Return distance to rectangle or another point."""
- if not len(args) > 0:
- raise ValueError("at least one parameter must be given")
- x = args[0]
- if len(x) == 2:
- x = Point(x)
- elif len(x) == 4:
- x = Rect(x)
- else:
- raise ValueError("arg1 must be point-like or rect-like")
- if len(args) > 1:
- unit = args[1]
- else:
- unit = "px"
- u = {"px": (1.,1.), "in": (1.,72.), "cm": (2.54, 72.),
- "mm": (25.4, 72.)}
- f = u[unit][0] / u[unit][1]
- if type(x) is Point:
- return abs(self - x) * f
- # from here on, x is a rectangle
- # as a safeguard, make a finite copy of it
- r = Rect(x.top_left, x.top_left)
- r = r | x.bottom_right
- if self in r:
- return 0.0
- if self.x > r.x1:
- if self.y >= r.y1:
- return self.distance_to(r.bottom_right, unit)
- elif self.y <= r.y0:
- return self.distance_to(r.top_right, unit)
- else:
- return (self.x - r.x1) * f
- elif r.x0 <= self.x <= r.x1:
- if self.y >= r.y1:
- return (self.y - r.y1) * f
- else:
- return (r.y0 - self.y) * f
- else:
- if self.y >= r.y1:
- return self.distance_to(r.bottom_left, unit)
- elif self.y <= r.y0:
- return self.distance_to(r.top_left, unit)
- else:
- return (r.x0 - self.x) * f
- def transform(self, m):
- """Replace point by its transformation with matrix-like m."""
- if len(m) != 6:
- raise ValueError("Matrix: bad seq len")
- self.x, self.y = util_transform_point(self, m)
- return self
- @property
- def unit(self):
- """Unit vector of the point."""
- s = self.x * self.x + self.y * self.y
- if s < EPSILON:
- return Point(0,0)
- s = math.sqrt(s)
- return Point(self.x / s, self.y / s)
- __div__ = __truediv__
- norm = __abs__
- class Quad:
- def __abs__(self):
- if self.is_empty:
- return 0.0
- return abs(self.ul - self.ur) * abs(self.ul - self.ll)
- def __add__(self, q):
- if hasattr(q, "__float__"):
- return Quad(self.ul + q, self.ur + q, self.ll + q, self.lr + q)
- if len(q) != 4:
- raise ValueError("Quad: bad seq len")
- return Quad(self.ul + q[0], self.ur + q[1], self.ll + q[2], self.lr + q[3])
- def __bool__(self):
- return not self.is_empty
- def __contains__(self, x):
- try:
- l = x.__len__()
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return False
- if l == 2:
- return util_point_in_quad(x, self)
- if l != 4:
- return False
- if CheckRect(x):
- if Rect(x).is_empty:
- return True
- return util_point_in_quad(x[:2], self) and util_point_in_quad(x[2:], self)
- if CheckQuad(x):
- for i in range(4):
- if not util_point_in_quad(x[i], self):
- return False
- return True
- return False
- def __eq__(self, quad):
- if not hasattr(quad, "__len__"):
- return False
- return len(quad) == 4 and (
- self.ul == quad[0] and
- self.ur == quad[1] and
- self.ll == quad[2] and
- self.lr == quad[3]
- )
- def __getitem__(self, i):
- return (self.ul, self.ur, self.ll, self.lr)[i]
- def __hash__(self):
- return hash(tuple(self))
- def __init__(self, *args, ul=None, ur=None, ll=None, lr=None):
- '''
- Quad() - all zero points
- Quad(ul, ur, ll, lr)
- Quad(quad) - new copy
- Quad(sequence) - from 'sequence'
- Explicit keyword args ul, ur, ll, lr override earlier settings if not
- None.
-
- '''
- if not args:
- self.ul = self.ur = self.ll = self.lr = Point()
- elif len(args) > 4:
- raise ValueError("Quad: bad seq len")
- elif len(args) == 4:
- self.ul, self.ur, self.ll, self.lr = map(Point, args)
- elif len(args) == 1:
- l = args[0]
- if isinstance(l, mupdf.FzQuad):
- self.this = l
- self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr)
- elif not hasattr(l, "__getitem__"):
- raise ValueError("Quad: bad args")
- elif len(l) != 4:
- raise ValueError("Quad: bad seq len")
- else:
- self.ul, self.ur, self.ll, self.lr = map(Point, l)
- else:
- raise ValueError("Quad: bad args")
- if ul is not None: self.ul = Point(ul)
- if ur is not None: self.ur = Point(ur)
- if ll is not None: self.ll = Point(ll)
- if lr is not None: self.lr = Point(lr)
- def __len__(self):
- return 4
- def __mul__(self, m):
- q = Quad(self)
- q = q.transform(m)
- return q
- def __neg__(self):
- return Quad(-self.ul, -self.ur, -self.ll, -self.lr)
- def __nonzero__(self):
- return not self.is_empty
- def __pos__(self):
- return Quad(self)
- def __repr__(self):
- return "Quad" + str(tuple(self))
- def __setitem__(self, i, v):
- if i == 0: self.ul = Point(v)
- elif i == 1: self.ur = Point(v)
- elif i == 2: self.ll = Point(v)
- elif i == 3: self.lr = Point(v)
- else:
- raise IndexError("index out of range")
- return None
- def __sub__(self, q):
- if hasattr(q, "__float__"):
- return Quad(self.ul - q, self.ur - q, self.ll - q, self.lr - q)
- if len(q) != 4:
- raise ValueError("Quad: bad seq len")
- return Quad(self.ul - q[0], self.ur - q[1], self.ll - q[2], self.lr - q[3])
- def __truediv__(self, m):
- if hasattr(m, "__float__"):
- im = 1. / m
- else:
- im = util_invert_matrix(m)[1]
- if not im:
- raise ZeroDivisionError("Matrix not invertible")
- q = Quad(self)
- q = q.transform(im)
- return q
- @property
- def is_convex(self):
- """Check if quad is convex and not degenerate.
- Notes:
- Check that for the two diagonals, the other two corners are not
- on the same side of the diagonal.
- Returns:
- True or False.
- """
- m = planish_line(self.ul, self.lr) # puts this diagonal on x-axis
- p1 = self.ll * m # transform the
- p2 = self.ur * m # other two points
- if p1.y * p2.y > 0:
- return False
- m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis
- p1 = self.lr * m # transform the
- p2 = self.ul * m # remaining points
- if p1.y * p2.y > 0:
- return False
- return True
- @property
- def is_empty(self):
- """Check whether all quad corners are on the same line.
- This is the case if width or height is zero.
- """
- return self.width < EPSILON or self.height < EPSILON
- @property
- def is_infinite(self):
- """Check whether this is the infinite quad."""
- return self.rect.is_infinite
- @property
- def is_rectangular(self):
- """Check if quad is rectangular.
- Notes:
- Some rotation matrix can thus transform it into a rectangle.
- This is equivalent to three corners enclose 90 degrees.
- Returns:
- True or False.
- """
- sine = util_sine_between(self.ul, self.ur, self.lr)
- if abs(sine - 1) > EPSILON: # the sine of the angle
- return False
- sine = util_sine_between(self.ur, self.lr, self.ll)
- if abs(sine - 1) > EPSILON:
- return False
- sine = util_sine_between(self.lr, self.ll, self.ul)
- if abs(sine - 1) > EPSILON:
- return False
- return True
- def morph(self, p, m):
- """Morph the quad with matrix-like 'm' and point-like 'p'.
- Return a new quad."""
- if self.is_infinite:
- return INFINITE_QUAD()
- delta = Matrix(1, 1).pretranslate(p.x, p.y)
- q = self * ~delta * m * delta
- return q
- @property
- def rect(self):
- r = Rect()
- r.x0 = min(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
- r.y0 = min(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
- r.x1 = max(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
- r.y1 = max(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
- return r
- def transform(self, m):
- """Replace quad by its transformation with matrix m."""
- if hasattr(m, "__float__"):
- pass
- elif len(m) != 6:
- raise ValueError("Matrix: bad seq len")
- self.ul *= m
- self.ur *= m
- self.ll *= m
- self.lr *= m
- return self
- __div__ = __truediv__
- width = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr)))
- height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr)))
- class Rect:
-
- def __abs__(self):
- if self.is_empty or self.is_infinite:
- return 0.0
- return (self.x1 - self.x0) * (self.y1 - self.y0)
- def __add__(self, p):
- if hasattr(p, "__float__"):
- return Rect(self.x0 + p, self.y0 + p, self.x1 + p, self.y1 + p)
- if len(p) != 4:
- raise ValueError("Rect: bad seq len")
- return Rect(self.x0 + p[0], self.y0 + p[1], self.x1 + p[2], self.y1 + p[3])
- def __and__(self, x):
- if not hasattr(x, "__len__"):
- raise ValueError("bad operand 2")
- r1 = Rect(x)
- r = Rect(self)
- return r.intersect(r1)
- def __bool__(self):
- return not (max(self) == min(self) == 0)
- def __contains__(self, x):
- if hasattr(x, "__float__"):
- return x in tuple(self)
- l = len(x)
- if l == 2:
- return util_is_point_in_rect(x, self)
- if l == 4:
- r = INFINITE_RECT()
- try:
- r = Rect(x)
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- r = Quad(x).rect
- return (self.x0 <= r.x0 <= r.x1 <= self.x1 and
- self.y0 <= r.y0 <= r.y1 <= self.y1)
- return False
- def __eq__(self, rect):
- if not hasattr(rect, "__len__"):
- return False
- return len(rect) == 4 and not (self - rect)
- def __getitem__(self, i):
- return (self.x0, self.y0, self.x1, self.y1)[i]
- def __hash__(self):
- return hash(tuple(self))
- def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
- """
- Rect() - all zeros
- Rect(x0, y0, x1, y1)
- Rect(top-left, x1, y1)
- Rect(x0, y0, bottom-right)
- Rect(top-left, bottom-right)
- Rect(Rect or IRect) - new copy
- Rect(sequence) - from 'sequence'
-
- Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings
- if not None.
- """
- x0, y0, x1, y1 = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
- self.x0 = float( x0)
- self.y0 = float( y0)
- self.x1 = float( x1)
- self.y1 = float( y1)
- def __len__(self):
- return 4
- def __mul__(self, m):
- if hasattr(m, "__float__"):
- return Rect(self.x0 * m, self.y0 * m, self.x1 * m, self.y1 * m)
- r = Rect(self)
- r = r.transform(m)
- return r
- def __neg__(self):
- return Rect(-self.x0, -self.y0, -self.x1, -self.y1)
- def __nonzero__(self):
- return not (max(self) == min(self) == 0)
- def __or__(self, x):
- if not hasattr(x, "__len__"):
- raise ValueError("bad operand 2")
- r = Rect(self)
- if len(x) == 2:
- return r.include_point(x)
- if len(x) == 4:
- return r.include_rect(x)
- raise ValueError("bad operand 2")
- def __pos__(self):
- return Rect(self)
- def __repr__(self):
- return "Rect" + str(tuple(self))
- def __setitem__(self, i, v):
- v = float(v)
- if i == 0: self.x0 = v
- elif i == 1: self.y0 = v
- elif i == 2: self.x1 = v
- elif i == 3: self.y1 = v
- else:
- raise IndexError("index out of range")
- return None
- def __sub__(self, p):
- if hasattr(p, "__float__"):
- return Rect(self.x0 - p, self.y0 - p, self.x1 - p, self.y1 - p)
- if len(p) != 4:
- raise ValueError("Rect: bad seq len")
- return Rect(self.x0 - p[0], self.y0 - p[1], self.x1 - p[2], self.y1 - p[3])
- def __truediv__(self, m):
- if hasattr(m, "__float__"):
- return Rect(self.x0 * 1./m, self.y0 * 1./m, self.x1 * 1./m, self.y1 * 1./m)
- im = util_invert_matrix(m)[1]
- if not im:
- raise ZeroDivisionError(f"Matrix not invertible: {m}")
- r = Rect(self)
- r = r.transform(im)
- return r
- @property
- def bottom_left(self):
- """Bottom-left corner."""
- return Point(self.x0, self.y1)
- @property
- def bottom_right(self):
- """Bottom-right corner."""
- return Point(self.x1, self.y1)
- def contains(self, x):
- """Check if containing point-like or rect-like x."""
- return self.__contains__(x)
- @property
- def height(self):
- return max(0, self.y1 - self.y0)
- def include_point(self, p):
- """Extend to include point-like p."""
- if len(p) != 2:
- raise ValueError("Point: bad seq len")
- self.x0, self.y0, self.x1, self.y1 = util_include_point_in_rect(self, p)
- return self
- def include_rect(self, r):
- """Extend to include rect-like r."""
- if len(r) != 4:
- raise ValueError("Rect: bad seq len")
- r = Rect(r)
- if r.is_infinite or self.is_infinite:
- self.x0, self.y0, self.x1, self.y1 = FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT
- elif r.is_empty:
- return self
- elif self.is_empty:
- self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
- else:
- self.x0, self.y0, self.x1, self.y1 = util_union_rect(self, r)
- return self
- def intersect(self, r):
- """Restrict to common rect with rect-like r."""
- if not len(r) == 4:
- raise ValueError("Rect: bad seq len")
- r = Rect(r)
- if r.is_infinite:
- return self
- elif self.is_infinite:
- self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
- elif r.is_empty:
- self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
- elif self.is_empty:
- return self
- else:
- self.x0, self.y0, self.x1, self.y1 = util_intersect_rect(self, r)
- return self
- def intersects(self, x):
- """Check if intersection with rectangle x is not empty."""
- rect2 = Rect(x)
- return (1
- and not self.is_empty
- and not self.is_infinite
- and not rect2.is_empty
- and not rect2.is_infinite
- and self.x0 < rect2.x1
- and rect2.x0 < self.x1
- and self.y0 < rect2.y1
- and rect2.y0 < self.y1
- )
- @property
- def is_empty(self):
- """True if rectangle area is empty."""
- return self.x0 >= self.x1 or self.y0 >= self.y1
- @property
- def is_infinite(self):
- """True if this is the infinite rectangle."""
- return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
- @property
- def is_valid(self):
- """True if rectangle is valid."""
- return self.x0 <= self.x1 and self.y0 <= self.y1
- def morph(self, p, m):
- """Morph with matrix-like m and point-like p.
- Returns a new quad."""
- if self.is_infinite:
- return INFINITE_QUAD()
- return self.quad.morph(p, m)
- def norm(self):
- return math.sqrt(sum([c*c for c in self]))
- def normalize(self):
- """Replace rectangle with its finite version."""
- if self.x1 < self.x0:
- self.x0, self.x1 = self.x1, self.x0
- if self.y1 < self.y0:
- self.y0, self.y1 = self.y1, self.y0
- return self
- @property
- def quad(self):
- """Return Quad version of rectangle."""
- return Quad(self.tl, self.tr, self.bl, self.br)
- def round(self):
- """Return the IRect."""
- return IRect(util_round_rect(self))
- @property
- def top_left(self):
- """Top-left corner."""
- return Point(self.x0, self.y0)
- @property
- def top_right(self):
- """Top-right corner."""
- return Point(self.x1, self.y0)
-
- def torect(self, r):
- """Return matrix that converts to target rect."""
- r = Rect(r)
- if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
- raise ValueError("rectangles must be finite and not empty")
- return (
- Matrix(1, 0, 0, 1, -self.x0, -self.y0)
- * Matrix(r.width / self.width, r.height / self.height)
- * Matrix(1, 0, 0, 1, r.x0, r.y0)
- )
- def transform(self, m):
- """Replace with the transformation by matrix-like m."""
- if not len(m) == 6:
- raise ValueError("Matrix: bad seq len")
- self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m)
- return self
- @property
- def width(self):
- return max(0, self.x1 - self.x0)
- __div__ = __truediv__
- bl = bottom_left
- br = bottom_right
- irect = property(round)
- tl = top_left
- tr = top_right
- class Story:
- def __init__( self, html='', user_css=None, em=12, archive=None):
- buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8'))
- if archive and not isinstance(archive, Archive):
- archive = Archive(archive)
- arch = archive.this if archive else mupdf.FzArchive( None)
- if hasattr(mupdf, 'FzStoryS'):
- self.this = mupdf.FzStoryS( buffer_, user_css, em, arch)
- else:
- self.this = mupdf.FzStory( buffer_, user_css, em, arch)
-
- def add_header_ids(self):
- '''
- Look for `<h1..6>` items in `self` and adds unique `id`
- attributes if not already present.
- '''
- dom = self.body
- i = 0
- x = dom.find(None, None, None)
- while x:
- name = x.tagname
- if len(name) == 2 and name[0]=="h" and name[1] in "123456":
- attr = x.get_attribute_value("id")
- if not attr:
- id_ = f"h_id_{i}"
- #log(f"{name=}: setting {id_=}")
- x.set_attribute("id", id_)
- i += 1
- x = x.find_next(None, None, None)
- @staticmethod
- def add_pdf_links(document_or_stream, positions):
- """
- Adds links to PDF document.
- Args:
- document_or_stream:
- A PDF `Document` or raw PDF content, for example an
- `io.BytesIO` instance.
- positions:
- List of `ElementPosition`'s for `document_or_stream`,
- typically from Story.element_positions(). We raise an
- exception if two or more positions have same id.
- Returns:
- `document_or_stream` if a `Document` instance, otherwise a
- new `Document` instance.
- We raise an exception if an `href` in `positions` refers to an
- internal position `#<name>` but no item in `positions` has `id =
- name`.
- """
- if isinstance(document_or_stream, Document):
- document = document_or_stream
- else:
- document = Document("pdf", document_or_stream)
- # Create dict from id to position, which we will use to find
- # link destinations.
- #
- id_to_position = dict()
- #log(f"positions: {positions}")
- for position in positions:
- #log(f"add_pdf_links(): position: {position}")
- if (position.open_close & 1) and position.id:
- #log(f"add_pdf_links(): position with id: {position}")
- if position.id in id_to_position:
- #log(f"Ignoring duplicate positions with id={position.id!r}")
- pass
- else:
- id_to_position[ position.id] = position
- # Insert links for all positions that have an `href`.
- #
- for position_from in positions:
-
- if (position_from.open_close & 1) and position_from.href:
-
- #log(f"add_pdf_links(): position with href: {position}")
- link = dict()
- link['from'] = Rect(position_from.rect)
-
- if position_from.href.startswith("#"):
- #`<a href="#...">...</a>` internal link.
- target_id = position_from.href[1:]
- try:
- position_to = id_to_position[ target_id]
- except Exception as e:
- if g_exceptions_verbose > 1: exception_info()
- raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") from e
- # Make link from `position_from`'s rect to top-left of
- # `position_to`'s rect.
- if 0:
- log(f"add_pdf_links(): making link from:")
- log(f"add_pdf_links(): {position_from}")
- log(f"add_pdf_links(): to:")
- log(f"add_pdf_links(): {position_to}")
- link["kind"] = LINK_GOTO
- x0, y0, x1, y1 = position_to.rect
- # This appears to work well with viewers which scroll
- # to make destination point top-left of window.
- link["to"] = Point(x0, y0)
- link["page"] = position_to.page_num - 1
-
- else:
- # `<a href="...">...</a>` external link.
- if position_from.href.startswith('name:'):
- link['kind'] = LINK_NAMED
- link['name'] = position_from.href[5:]
- else:
- link['kind'] = LINK_URI
- link['uri'] = position_from.href
-
- #log(f'Adding link: {position_from.page_num=} {link=}.')
- document[position_from.page_num - 1].insert_link(link)
-
- return document
- @property
- def body(self):
- dom = self.document()
- return dom.bodytag()
-
- def document( self):
- dom = mupdf.fz_story_document( self.this)
- return Xml( dom)
- def draw( self, device, matrix=None):
- ctm2 = JM_matrix_from_py( matrix)
- dev = device.this if device else mupdf.FzDevice( None)
- mupdf.fz_draw_story( self.this, dev, ctm2)
- def element_positions( self, function, args=None):
- '''
- Trigger a callback function to record where items have been placed.
- '''
- if type(args) is dict:
- for k in args.keys():
- if not (type(k) is str and k.isidentifier()):
- raise ValueError(f"invalid key '{k}'")
- else:
- args = {}
- if not callable(function) or function.__code__.co_argcount != 1:
- raise ValueError("callback 'function' must be a callable with exactly one argument")
-
- def function2( position):
- class Position2:
- pass
- position2 = Position2()
- position2.depth = position.depth
- position2.heading = position.heading
- position2.id = position.id
- position2.rect = JM_py_from_rect(position.rect)
- position2.text = position.text
- position2.open_close = position.open_close
- position2.rect_num = position.rectangle_num
- position2.href = position.href
- if args:
- for k, v in args.items():
- setattr( position2, k, v)
- function( position2)
- mupdf.fz_story_positions( self.this, function2)
- def place( self, where):
- where = JM_rect_from_py( where)
- filled = mupdf.FzRect()
- more = mupdf.fz_place_story( self.this, where, filled)
- return more, JM_py_from_rect( filled)
- def reset( self):
- mupdf.fz_reset_story( self.this)
-
- def write(self, writer, rectfn, positionfn=None, pagefn=None):
- dev = None
- page_num = 0
- rect_num = 0
- filled = Rect(0, 0, 0, 0)
- while 1:
- mediabox, rect, ctm = rectfn(rect_num, filled)
- rect_num += 1
- if mediabox:
- # new page.
- page_num += 1
- more, filled = self.place( rect)
- if positionfn:
- def positionfn2(position):
- # We add a `.page_num` member to the
- # `ElementPosition` instance.
- position.page_num = page_num
- positionfn(position)
- self.element_positions(positionfn2)
- if writer:
- if mediabox:
- # new page.
- if dev:
- if pagefn:
- pagefn(page_num, mediabox, dev, 1)
- writer.end_page()
- dev = writer.begin_page( mediabox)
- if pagefn:
- pagefn(page_num, mediabox, dev, 0)
- self.draw( dev, ctm)
- if not more:
- if pagefn:
- pagefn( page_num, mediabox, dev, 1)
- writer.end_page()
- else:
- self.draw(None, ctm)
- if not more:
- break
- @staticmethod
- def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
- positions = list()
- content = None
- # Iterate until stable.
- while 1:
- content_prev = content
- content = contentfn( positions)
- stable = False
- if content == content_prev:
- stable = True
- content2 = content
- story = Story(content2, user_css, em, archive)
- if add_header_ids:
- story.add_header_ids()
- positions = list()
- def positionfn2(position):
- #log(f"write_stabilized(): {stable=} {positionfn=} {position=}")
- positions.append(position)
- if stable and positionfn:
- positionfn(position)
- story.write(
- writer if stable else None,
- rectfn,
- positionfn2,
- pagefn,
- )
- if stable:
- break
- @staticmethod
- def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
- #log("write_stabilized_with_links()")
- stream = io.BytesIO()
- writer = DocumentWriter(stream)
- positions = []
- def positionfn2(position):
- #log(f"write_stabilized_with_links(): {position=}")
- positions.append(position)
- if positionfn:
- positionfn(position)
- Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids)
- writer.close()
- stream.seek(0)
- return Story.add_pdf_links(stream, positions)
- def write_with_links(self, rectfn, positionfn=None, pagefn=None):
- #log("write_with_links()")
- stream = io.BytesIO()
- writer = DocumentWriter(stream)
- positions = []
- def positionfn2(position):
- #log(f"write_with_links(): {position=}")
- positions.append(position)
- if positionfn:
- positionfn(position)
- self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn)
- writer.close()
- stream.seek(0)
- return Story.add_pdf_links(stream, positions)
- class FitResult:
- '''
- The result from a `Story.fit*()` method.
-
- Members:
-
- `big_enough`:
- `True` if the fit succeeded.
- `filled`:
- From the last call to `Story.place()`.
- `more`:
- `False` if the fit succeeded.
- `numcalls`:
- Number of calls made to `self.place()`.
- `parameter`:
- The successful parameter value, or the largest failing value.
- `rect`:
- The rect created from `parameter`.
- '''
- def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
- self.big_enough = big_enough
- self.filled = filled
- self.more = more
- self.numcalls = numcalls
- self.parameter = parameter
- self.rect = rect
-
- def __repr__(self):
- return (
- f' big_enough={self.big_enough}'
- f' filled={self.filled}'
- f' more={self.more}'
- f' numcalls={self.numcalls}'
- f' parameter={self.parameter}'
- f' rect={self.rect}'
- )
- def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
- '''
- Finds optimal rect that contains the story `self`.
-
- Returns a `Story.FitResult` instance.
-
- On success, the last call to `self.place()` will have been with the
- returned rectangle, so `self.draw()` can be used directly.
-
- Args:
- :arg fn:
- A callable taking a floating point `parameter` and returning a
- `pymupdf.Rect()`. If the rect is empty, we assume the story will
- not fit and do not call `self.place()`.
- Must guarantee that `self.place()` behaves monotonically when
- given rect `fn(parameter`) as `parameter` increases. This
- usually means that both width and height increase or stay
- unchanged as `parameter` increases.
- :arg pmin:
- Minimum parameter to consider; `None` for -infinity.
- :arg pmax:
- Maximum parameter to consider; `None` for +infinity.
- :arg delta:
- Maximum error in returned `parameter`.
- :arg verbose:
- If true we output diagnostics.
- '''
- def log(text):
- assert verbose
- message(f'fit(): {text}')
-
- assert isinstance(pmin, (int, float)) or pmin is None
- assert isinstance(pmax, (int, float)) or pmax is None
-
- class State:
- def __init__(self):
- self.pmin = pmin
- self.pmax = pmax
- self.pmin_result = None
- self.pmax_result = None
- self.result = None
- self.numcalls = 0
- if verbose:
- self.pmin0 = pmin
- self.pmax0 = pmax
- state = State()
-
- if verbose:
- log(f'starting. {state.pmin=} {state.pmax=}.')
-
- self.reset()
- def ret():
- if state.pmax is not None:
- if state.last_p != state.pmax:
- if verbose:
- log(f'Calling update() with pmax, because was overwritten by later calls.')
- big_enough = update(state.pmax)
- assert big_enough
- result = state.pmax_result
- else:
- result = state.pmin_result if state.pmin_result else Story.FitResult(numcalls=state.numcalls)
- if verbose:
- log(f'finished. {state.pmin0=} {state.pmax0=} {state.pmax=}: returning {result=}')
- return result
-
- def update(parameter):
- '''
- Evaluates `more, _ = self.place(fn(parameter))`. If `more` is
- false, then `rect` is big enough to contain `self` and we
- set `state.pmax=parameter` and return True. Otherwise we set
- `state.pmin=parameter` and return False.
- '''
- rect = fn(parameter)
- assert isinstance(rect, Rect), f'{type(rect)=} {rect=}'
- if rect.is_empty:
- big_enough = False
- result = Story.FitResult(parameter=parameter, numcalls=state.numcalls)
- if verbose:
- log(f'update(): not calling self.place() because rect is empty.')
- else:
- more, filled = self.place(rect)
- state.numcalls += 1
- big_enough = not more
- result = Story.FitResult(
- filled=filled,
- more=more,
- numcalls=state.numcalls,
- parameter=parameter,
- rect=rect,
- big_enough=big_enough,
- )
- if verbose:
- log(f'update(): called self.place(): {state.numcalls:>2d}: {more=} {parameter=} {rect=}.')
- if big_enough:
- state.pmax = parameter
- state.pmax_result = result
- else:
- state.pmin = parameter
- state.pmin_result = result
- state.last_p = parameter
- return big_enough
- def opposite(p, direction):
- '''
- Returns same sign as `direction`, larger or smaller than `p` if
- direction is positive or negative respectively.
- '''
- if p is None or p==0:
- return direction
- if direction * p > 0:
- return 2 * p
- return -p
-
- if state.pmin is None:
- # Find an initial finite pmin value.
- if verbose: log(f'finding pmin.')
- parameter = opposite(state.pmax, -1)
- while 1:
- if not update(parameter):
- break
- parameter *= 2
- else:
- if update(state.pmin):
- if verbose: log(f'{state.pmin=} is big enough.')
- return ret()
-
- if state.pmax is None:
- # Find an initial finite pmax value.
- if verbose: log(f'finding pmax.')
- parameter = opposite(state.pmin, +1)
- while 1:
- if update(parameter):
- break
- parameter *= 2
- else:
- if not update(state.pmax):
- # No solution possible.
- state.pmax = None
- if verbose: log(f'No solution possible {state.pmax=}.')
- return ret()
-
- # Do binary search in pmin..pmax.
- if verbose: log(f'doing binary search with {state.pmin=} {state.pmax=}.')
- while 1:
- if state.pmax - state.pmin < delta:
- return ret()
- parameter = (state.pmin + state.pmax) / 2
- update(parameter)
- def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False):
- '''
- Finds smallest value `scale` in range `scale_min..scale_max` where
- `scale * rect` is large enough to contain the story `self`.
- Returns a `Story.FitResult` instance.
- :arg width:
- width of rect.
- :arg height:
- height of rect.
- :arg scale_min:
- Minimum scale to consider; must be >= 0.
- :arg scale_max:
- Maximum scale to consider, must be >= scale_min or `None` for
- infinite.
- :arg delta:
- Maximum error in returned scale.
- :arg verbose:
- If true we output diagnostics.
- '''
- x0, y0, x1, y1 = rect
- width = x1 - x0
- height = y1 - y0
- def fn(scale):
- return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
- return self.fit(fn, scale_min, scale_max, delta, verbose)
- def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
- '''
- Finds smallest height in range `height_min..height_max` where a rect
- with size `(width, height)` is large enough to contain the story
- `self`.
- Returns a `Story.FitResult` instance.
- :arg width:
- width of rect.
- :arg height_min:
- Minimum height to consider; must be >= 0.
- :arg height_max:
- Maximum height to consider, must be >= height_min or `None` for
- infinite.
- :arg origin:
- `(x0, y0)` of rect.
- :arg delta:
- Maximum error in returned height.
- :arg verbose:
- If true we output diagnostics.
- '''
- x0, y0 = origin
- x1 = x0 + width
- def fn(height):
- return Rect(x0, y0, x1, y0+height)
- return self.fit(fn, height_min, height_max, delta, verbose)
- def fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False):
- '''
- Finds smallest width in range `width_min..width_max` where a rect with size
- `(width, height)` is large enough to contain the story `self`.
- Returns a `Story.FitResult` instance.
- Returns a `FitResult` instance.
- :arg height:
- height of rect.
- :arg width_min:
- Minimum width to consider; must be >= 0.
- :arg width_max:
- Maximum width to consider, must be >= width_min or `None` for
- infinite.
- :arg origin:
- `(x0, y0)` of rect.
- :arg delta:
- Maximum error in returned width.
- :arg verbose:
- If true we output diagnostics.
- '''
- x0, y0 = origin
- y1 = y0 + height
- def fn(width):
- return Rect(x0, y0, x0+width, y1)
- return self.fit(fn, width_min, width_max, delta, verbose)
- class TextPage:
- def __init__(self, *args):
- if args_match(args, mupdf.FzRect):
- mediabox = args[0]
- self.this = mupdf.FzStextPage( mediabox)
- elif args_match(args, mupdf.FzStextPage):
- self.this = args[0]
- else:
- raise Exception(f'Unrecognised args: {args}')
- self.thisown = True
- self.parent = None
- def _extractText(self, format_):
- this_tpage = self.this
- res = mupdf.fz_new_buffer(1024)
- out = mupdf.FzOutput( res)
- # fixme: mupdfwrap.py thinks fz_output is not copyable, possibly
- # because there is no .refs member visible and no fz_keep_output() fn,
- # although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer()
- # doesn't convert the returned fz_output* into a mupdf.FzOutput.
- #out = mupdf.FzOutput(out)
- if format_ == 1:
- mupdf.fz_print_stext_page_as_html(out, this_tpage, 0)
- elif format_ == 3:
- mupdf.fz_print_stext_page_as_xml(out, this_tpage, 0)
- elif format_ == 4:
- mupdf.fz_print_stext_page_as_xhtml(out, this_tpage, 0)
- else:
- JM_print_stext_page_as_text(res, this_tpage)
- out.fz_close_output()
- text = JM_EscapeStrFromBuffer(res)
- return text
- def _getNewBlockList(self, page_dict, raw):
- JM_make_textpage_dict(self.this, page_dict, raw)
- def _textpage_dict(self, raw=False):
- page_dict = {"width": self.rect.width, "height": self.rect.height}
- self._getNewBlockList(page_dict, raw)
- return page_dict
- def extractBLOCKS(self):
- """Return a list with text block information."""
- if g_use_extra:
- return extra.extractBLOCKS(self.this)
- block_n = -1
- this_tpage = self.this
- tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
- res = mupdf.fz_new_buffer(1024)
- lines = []
- for block in this_tpage:
- block_n += 1
- blockrect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- mupdf.fz_clear_buffer(res) # set text buffer to empty
- line_n = -1
- last_char = 0
- for line in block:
- line_n += 1
- linerect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- for ch in line:
- cbbox = JM_char_bbox(line, ch)
- if (not JM_rects_overlap(tp_rect, cbbox)
- and not mupdf.fz_is_infinite_rect(tp_rect)
- ):
- continue
- JM_append_rune(res, ch.m_internal.c)
- last_char = ch.m_internal.c
- linerect = mupdf.fz_union_rect(linerect, cbbox)
- if last_char != 10 and not mupdf.fz_is_empty_rect(linerect):
- mupdf.fz_append_byte(res, 10)
- blockrect = mupdf.fz_union_rect(blockrect, linerect)
- text = JM_EscapeStrFromBuffer(res)
- elif (JM_rects_overlap(tp_rect, block.m_internal.bbox)
- or mupdf.fz_is_infinite_rect(tp_rect)
- ):
- img = block.i_image()
- cs = img.colorspace()
- text = "<image: %s, width: %d, height: %d, bpc: %d>" % (
- mupdf.fz_colorspace_name(cs),
- img.w(), img.h(), img.bpc()
- )
- blockrect = mupdf.fz_union_rect(blockrect, mupdf.FzRect(block.m_internal.bbox))
- if not mupdf.fz_is_empty_rect(blockrect):
- litem = (
- blockrect.x0,
- blockrect.y0,
- blockrect.x1,
- blockrect.y1,
- text,
- block_n,
- block.m_internal.type,
- )
- lines.append(litem)
- return lines
- def extractDICT(self, cb=None, sort=False) -> dict:
- """Return page content as a Python dict of images and text spans."""
- val = self._textpage_dict(raw=False)
- if cb is not None:
- val["width"] = cb.width
- val["height"] = cb.height
- if sort:
- blocks = val["blocks"]
- blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
- val["blocks"] = blocks
- return val
- def extractHTML(self) -> str:
- """Return page content as a HTML string."""
- return self._extractText(1)
- def extractIMGINFO(self, hashes=0):
- """Return a list with image meta information."""
- block_n = -1
- this_tpage = self.this
- rc = []
- for block in this_tpage:
- block_n += 1
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- continue
- img = block.i_image()
- img_size = 0
- mask = img.mask()
- if mask.m_internal:
- has_mask = True
- else:
- has_mask = False
- compr_buff = mupdf.fz_compressed_image_buffer(img)
- if compr_buff.m_internal:
- img_size = compr_buff.fz_compressed_buffer_size()
- compr_buff = None
- if hashes:
- r = mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
- assert mupdf.fz_is_infinite_irect(r)
- m = mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0)
- pix, w, h = mupdf.fz_get_pixmap_from_image(img, r, m)
- digest = mupdf.fz_md5_pixmap2(pix)
- digest = bytes(digest)
- if img_size == 0:
- img_size = img.w() * img.h() * img.n()
- cs = mupdf.FzColorspace(mupdf.ll_fz_keep_colorspace(img.m_internal.colorspace))
- block_dict = dict()
- block_dict[dictkey_number] = block_n
- block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
- block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
- block_dict[dictkey_width] = img.w()
- block_dict[dictkey_height] = img.h()
- block_dict[dictkey_colorspace] = mupdf.fz_colorspace_n(cs)
- block_dict[dictkey_cs_name] = mupdf.fz_colorspace_name(cs)
- block_dict[dictkey_xres] = img.xres()
- block_dict[dictkey_yres] = img.yres()
- block_dict[dictkey_bpc] = img.bpc()
- block_dict[dictkey_size] = img_size
- if hashes:
- block_dict["digest"] = digest
- block_dict["has-mask"] = has_mask
- rc.append(block_dict)
- return rc
- def extractJSON(self, cb=None, sort=False) -> str:
- """Return 'extractDICT' converted to JSON format."""
- import base64
- import json
- val = self._textpage_dict(raw=False)
- class b64encode(json.JSONEncoder):
- def default(self, s):
- if type(s) in (bytes, bytearray):
- return base64.b64encode(s).decode()
- if cb is not None:
- val["width"] = cb.width
- val["height"] = cb.height
- if sort:
- blocks = val["blocks"]
- blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
- val["blocks"] = blocks
-
- val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
- return val
- def extractRAWDICT(self, cb=None, sort=False) -> dict:
- """Return page content as a Python dict of images and text characters."""
- val = self._textpage_dict(raw=True)
- if cb is not None:
- val["width"] = cb.width
- val["height"] = cb.height
- if sort:
- blocks = val["blocks"]
- blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
- val["blocks"] = blocks
- return val
- def extractRAWJSON(self, cb=None, sort=False) -> str:
- """Return 'extractRAWDICT' converted to JSON format."""
- import base64
- import json
- val = self._textpage_dict(raw=True)
- class b64encode(json.JSONEncoder):
- def default(self,s):
- if type(s) in (bytes, bytearray):
- return base64.b64encode(s).decode()
- if cb is not None:
- val["width"] = cb.width
- val["height"] = cb.height
- if sort:
- blocks = val["blocks"]
- blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
- val["blocks"] = blocks
- val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
- return val
- def extractSelection(self, pointa, pointb):
- a = JM_point_from_py(pointa)
- b = JM_point_from_py(pointb)
- found = mupdf.fz_copy_selection(self.this, a, b, 0)
- return found
- def extractText(self, sort=False) -> str:
- """Return simple, bare text on the page."""
- if not sort:
- return self._extractText(0)
- blocks = self.extractBLOCKS()[:]
- blocks.sort(key=lambda b: (b[3], b[0]))
- return "".join([b[4] for b in blocks])
- def extractTextbox(self, rect):
- this_tpage = self.this
- assert isinstance(this_tpage, mupdf.FzStextPage)
- area = JM_rect_from_py(rect)
- found = JM_copy_rectangle(this_tpage, area)
- rc = PyUnicode_DecodeRawUnicodeEscape(found)
- return rc
- def extractWORDS(self, delimiters=None):
- """Return a list with text word information."""
- if g_use_extra:
- return extra.extractWORDS(self.this, delimiters)
- buflen = 0
- last_char_rtl = 0
- block_n = -1
- wbbox = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word bbox
- this_tpage = self.this
- tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
- lines = None
- buff = mupdf.fz_new_buffer(64)
- lines = []
- for block in this_tpage:
- block_n += 1
- if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
- continue
- line_n = -1
- for line in block:
- line_n += 1
- word_n = 0 # word counter per line
- mupdf.fz_clear_buffer(buff) # reset word buffer
- buflen = 0 # reset char counter
- for ch in line:
- cbbox = JM_char_bbox(line, ch)
- if (not JM_rects_overlap(tp_rect, cbbox)
- and not mupdf.fz_is_infinite_rect(tp_rect)
- ):
- continue
- word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
- this_char_rtl = JM_is_rtl_char(ch.m_internal.c)
- if word_delimiter or this_char_rtl != last_char_rtl:
- if buflen == 0 and word_delimiter:
- continue # skip delimiters at line start
- if not mupdf.fz_is_empty_rect(wbbox):
- word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
- mupdf.fz_clear_buffer(buff)
- buflen = 0 # reset char counter
- if word_delimiter:
- continue
- # append one unicode character to the word
- JM_append_rune(buff, ch.m_internal.c)
- last_char_rtl = this_char_rtl
- buflen += 1
- # enlarge word bbox
- wbbox = mupdf.fz_union_rect(wbbox, JM_char_bbox(line, ch))
- if buflen and not mupdf.fz_is_empty_rect(wbbox):
- word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
- buflen = 0
- return lines
- def extractXHTML(self) -> str:
- """Return page content as a XHTML string."""
- return self._extractText(4)
- def extractXML(self) -> str:
- """Return page content as a XML string."""
- return self._extractText(3)
- def poolsize(self):
- """TextPage current poolsize."""
- tpage = self.this
- pool = mupdf.Pool(tpage.m_internal.pool)
- size = mupdf.fz_pool_size( pool)
- pool.m_internal = None # Ensure that pool's destructor does not free the pool.
- return size
- @property
- def rect(self):
- """Page rectangle."""
- this_tpage = self.this
- mediabox = this_tpage.m_internal.mediabox
- val = JM_py_from_rect(mediabox)
- val = Rect(val)
- return val
- def search(self, needle, hit_max=0, quads=1):
- """Locate 'needle' returning rects or quads."""
- val = JM_search_stext_page(self.this, needle)
- if not val:
- return val
- items = len(val)
- for i in range(items): # change entries to quads or rects
- q = Quad(val[i])
- if quads:
- val[i] = q
- else:
- val[i] = q.rect
- if quads:
- return val
- i = 0 # join overlapping rects on the same line
- while i < items - 1:
- v1 = val[i]
- v2 = val[i + 1]
- if v1.y1 != v2.y1 or (v1 & v2).is_empty:
- i += 1
- continue # no overlap on same line
- val[i] = v1 | v2 # join rectangles
- del val[i + 1] # remove v2
- items -= 1 # reduce item count
- return val
- extractTEXT = extractText
- class TextWriter:
- def __init__(self, page_rect, opacity=1, color=None):
- """Stores text spans for later output on compatible PDF pages."""
- self.this = mupdf.fz_new_text()
- self.opacity = opacity
- self.color = color
- self.rect = Rect(page_rect)
- self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height)
- self.ictm = ~self.ctm
- self.last_point = Point()
- self.last_point.__doc__ = "Position following last text insertion."
- self.text_rect = Rect()
-
- self.text_rect.__doc__ = "Accumulated area of text spans."
- self.used_fonts = set()
- self.thisown = True
- @property
- def _bbox(self):
- val = JM_py_from_rect( mupdf.fz_bound_text( self.this, mupdf.FzStrokeState(None), mupdf.FzMatrix()))
- val = Rect(val)
- return val
- def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left=0, small_caps=0):
- """Store 'text' at point 'pos' using 'font' and 'fontsize'."""
- pos = Point(pos) * self.ictm
- #log( '{font=}')
- if font is None:
- font = Font("helv")
- if not font.is_writable:
- if 0:
- log( '{font.this.m_internal.name=}')
- log( '{font.this.m_internal.t3matrix=}')
- log( '{font.this.m_internal.bbox=}')
- log( '{font.this.m_internal.glyph_count=}')
- log( '{font.this.m_internal.use_glyph_bbox=}')
- log( '{font.this.m_internal.width_count=}')
- log( '{font.this.m_internal.width_default=}')
- log( '{font.this.m_internal.has_digest=}')
- log( 'Unsupported font {font.name=}')
- if mupdf_cppyy:
- import cppyy
- log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}')
- raise ValueError("Unsupported font '%s'." % font.name)
- if right_to_left:
- text = self.clean_rtl(text)
- text = "".join(reversed(text))
- right_to_left = 0
- lang = mupdf.fz_text_language_from_string(language)
- p = JM_point_from_py(pos)
- trm = mupdf.fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y)
- markup_dir = 0
- wmode = 0
- if small_caps == 0:
- trm = mupdf.fz_show_string( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
- else:
- trm = JM_show_string_cs( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
- val = JM_py_from_matrix(trm)
- self.last_point = Point(val[-2:]) * self.ctm
- self.text_rect = self._bbox * self.ctm
- val = self.text_rect, self.last_point
- if font.flags["mono"] == 1:
- self.used_fonts.add(font)
- return val
- def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False):
- lheight = fontsize * 1.2
- for c in text:
- self.append(pos, c, font=font, fontsize=fontsize,
- language=language, small_caps=small_caps)
- pos.y += lheight
- return self.text_rect, self.last_point
- def clean_rtl(self, text):
- """Revert the sequence of Latin text parts.
- Text with right-to-left writing direction (Arabic, Hebrew) often
- contains Latin parts, which are written in left-to-right: numbers, names,
- etc. For output as PDF text we need *everything* in right-to-left.
- E.g. an input like "<arabic> ABCDE FG HIJ <arabic> KL <arabic>" will be
- converted to "<arabic> JIH GF EDCBA <arabic> LK <arabic>". The Arabic
- parts remain untouched.
- Args:
- text: str
- Returns:
- Massaged string.
- """
- if not text:
- return text
- # split into words at space boundaries
- words = text.split(" ")
- idx = []
- for i in range(len(words)):
- w = words[i]
- # revert character sequence for Latin only words
- if not (len(w) < 2 or max([ord(c) for c in w]) > 255):
- words[i] = "".join(reversed(w))
- idx.append(i) # stored index of Latin word
- # adjacent Latin words must revert their sequence, too
- idx2 = [] # store indices of adjacent Latin words
- for i in range(len(idx)):
- if idx2 == []: # empty yet?
- idx2.append(idx[i]) # store Latin word number
- elif idx[i] > idx2[-1] + 1: # large gap to last?
- if len(idx2) > 1: # at least two consecutives?
- words[idx2[0] : idx2[-1] + 1] = reversed(
- words[idx2[0] : idx2[-1] + 1]
- ) # revert their sequence
- idx2 = [idx[i]] # re-initialize
- elif idx[i] == idx2[-1] + 1: # new adjacent Latin word
- idx2.append(idx[i])
- text = " ".join(words)
- return text
- def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
- """Write the text to a PDF page having the TextWriter's page size.
- Args:
- page: a PDF page having same size.
- color: override text color.
- opacity: override transparency.
- overlay: put in foreground or background.
- morph: tuple(Point, Matrix), apply a matrix with a fixpoint.
- matrix: Matrix to be used instead of 'morph' argument.
- render_mode: (int) PDF render mode operator 'Tr'.
- """
- CheckParent(page)
- if abs(self.rect - page.rect) > 1e-3:
- raise ValueError("incompatible page rect")
- if morph is not None:
- if (type(morph) not in (tuple, list)
- or type(morph[0]) is not Point
- or type(morph[1]) is not Matrix
- ):
- raise ValueError("morph must be (Point, Matrix) or None")
- if matrix is not None and morph is not None:
- raise ValueError("only one of matrix, morph is allowed")
- if getattr(opacity, "__float__", None) is None or opacity == -1:
- opacity = self.opacity
- if color is None:
- color = self.color
- if 1:
- pdfpage = page._pdf_page()
- alpha = 1
- if opacity >= 0 and opacity < 1:
- alpha = opacity
- ncol = 1
- dev_color = [0, 0, 0, 0]
- if color:
- ncol, dev_color = JM_color_FromSequence(color)
- if ncol == 3:
- colorspace = mupdf.fz_device_rgb()
- elif ncol == 4:
- colorspace = mupdf.fz_device_cmyk()
- else:
- colorspace = mupdf.fz_device_gray()
- resources = mupdf.pdf_new_dict(pdfpage.doc(), 5)
- contents = mupdf.fz_new_buffer(1024)
- dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents)
- #log( '=== {dev_color!r=}')
- mupdf.fz_fill_text(
- dev,
- self.this,
- mupdf.FzMatrix(),
- colorspace,
- dev_color,
- alpha,
- mupdf.FzColorParams(mupdf.fz_default_color_params),
- )
- mupdf.fz_close_device( dev)
- # copy generated resources into the one of the page
- max_nums = JM_merge_resources( pdfpage, resources)
- cont_string = JM_EscapeStrFromBuffer( contents)
- result = (max_nums, cont_string)
- val = result
- max_nums = val[0]
- content = val[1]
- max_alp, max_font = max_nums
- old_cont_lines = content.splitlines()
- optcont = page._get_optional_content(oc)
- if optcont is not None:
- bdc = "/OC /%s BDC" % optcont
- emc = "EMC"
- else:
- bdc = emc = ""
- new_cont_lines = ["q"]
- if bdc:
- new_cont_lines.append(bdc)
- cb = page.cropbox_position
- if page.rotation in (90, 270):
- delta = page.rect.height - page.rect.width
- else:
- delta = 0
- mb = page.mediabox
- if bool(cb) or mb.y0 != 0 or delta != 0:
- new_cont_lines.append(f"1 0 0 1 {_format_g((cb.x, cb.y + mb.y0 - delta))} cm")
- if morph:
- p = morph[0] * self.ictm
- delta = Matrix(1, 1).pretranslate(p.x, p.y)
- matrix = ~delta * morph[1] * delta
- if morph or matrix:
- new_cont_lines.append(_format_g(JM_TUPLE(matrix)) + " cm")
- for line in old_cont_lines:
- if line.endswith(" cm"):
- continue
- if line == "BT":
- new_cont_lines.append(line)
- new_cont_lines.append("%i Tr" % render_mode)
- continue
- if line.endswith(" gs"):
- alp = int(line.split()[0][4:]) + max_alp
- line = "/Alp%i gs" % alp
- elif line.endswith(" Tf"):
- temp = line.split()
- fsize = float(temp[1])
- if render_mode != 0:
- w = fsize * 0.05
- else:
- w = 1
- new_cont_lines.append(_format_g(w) + " w")
- font = int(temp[0][2:]) + max_font
- line = " ".join(["/F%i" % font] + temp[1:])
- elif line.endswith(" rg"):
- new_cont_lines.append(line.replace("rg", "RG"))
- elif line.endswith(" g"):
- new_cont_lines.append(line.replace(" g", " G"))
- elif line.endswith(" k"):
- new_cont_lines.append(line.replace(" k", " K"))
- new_cont_lines.append(line)
- if emc:
- new_cont_lines.append(emc)
- new_cont_lines.append("Q\n")
- content = "\n".join(new_cont_lines).encode("utf-8")
- TOOLS._insert_contents(page, content, overlay=overlay)
- val = None
- for font in self.used_fonts:
- repair_mono_font(page, font)
- return val
- class IRect:
- """
- IRect() - all zeros
- IRect(x0, y0, x1, y1) - 4 coordinates
- IRect(top-left, x1, y1) - point and 2 coordinates
- IRect(x0, y0, bottom-right) - 2 coordinates and point
- IRect(top-left, bottom-right) - 2 points
- IRect(sequ) - new from sequence or rect-like
- """
- def __add__(self, p):
- return Rect.__add__(self, p).round()
- def __and__(self, x):
- return Rect.__and__(self, x).round()
- def __contains__(self, x):
- return Rect.__contains__(self, x)
- def __eq__(self, r):
- if not hasattr(r, "__len__"):
- return False
- return len(r) == 4 and self.x0 == r[0] and self.y0 == r[1] and self.x1 == r[2] and self.y1 == r[3]
- def __getitem__(self, i):
- return (self.x0, self.y0, self.x1, self.y1)[i]
- def __hash__(self):
- return hash(tuple(self))
- def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
- self.x0, self.y0, self.x1, self.y1 = util_make_irect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
- def __len__(self):
- return 4
- def __mul__(self, m):
- return Rect.__mul__(self, m).round()
- def __neg__(self):
- return IRect(-self.x0, -self.y0, -self.x1, -self.y1)
- def __or__(self, x):
- return Rect.__or__(self, x).round()
- def __pos__(self):
- return IRect(self)
- def __repr__(self):
- return "IRect" + str(tuple(self))
- def __setitem__(self, i, v):
- v = int(v)
- if i == 0: self.x0 = v
- elif i == 1: self.y0 = v
- elif i == 2: self.x1 = v
- elif i == 3: self.y1 = v
- else:
- raise IndexError("index out of range")
- return None
- def __sub__(self, p):
- return Rect.__sub__(self, p).round()
- def __truediv__(self, m):
- return Rect.__truediv__(self, m).round()
- @property
- def bottom_left(self):
- """Bottom-left corner."""
- return Point(self.x0, self.y1)
- @property
- def bottom_right(self):
- """Bottom-right corner."""
- return Point(self.x1, self.y1)
- @property
- def height(self):
- return max(0, self.y1 - self.y0)
- def contains(self, x):
- """Check if x is in the rectangle."""
- return self.__contains__(x)
- def include_point(self, p):
- """Extend rectangle to include point p."""
- rect = self.rect.include_point(p)
- return rect.irect
- def include_rect(self, r):
- """Extend rectangle to include rectangle r."""
- rect = self.rect.include_rect(r)
- return rect.irect
- def intersect(self, r):
- """Restrict rectangle to intersection with rectangle r."""
- return Rect.intersect(self, r).round()
- def intersects(self, x):
- return Rect.intersects(self, x)
- @property
- def is_empty(self):
- """True if rectangle area is empty."""
- return self.x0 >= self.x1 or self.y0 >= self.y1
- @property
- def is_infinite(self):
- """True if rectangle is infinite."""
- return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
- @property
- def is_valid(self):
- """True if rectangle is valid."""
- return self.x0 <= self.x1 and self.y0 <= self.y1
- def morph(self, p, m):
- """Morph with matrix-like m and point-like p.
- Returns a new quad."""
- if self.is_infinite:
- return INFINITE_QUAD()
- return self.quad.morph(p, m)
- def norm(self):
- return math.sqrt(sum([c*c for c in self]))
- def normalize(self):
- """Replace rectangle with its valid version."""
- if self.x1 < self.x0:
- self.x0, self.x1 = self.x1, self.x0
- if self.y1 < self.y0:
- self.y0, self.y1 = self.y1, self.y0
- return self
- @property
- def quad(self):
- """Return Quad version of rectangle."""
- return Quad(self.tl, self.tr, self.bl, self.br)
- @property
- def rect(self):
- return Rect(self)
- @property
- def top_left(self):
- """Top-left corner."""
- return Point(self.x0, self.y0)
- @property
- def top_right(self):
- """Top-right corner."""
- return Point(self.x1, self.y0)
- def torect(self, r):
- """Return matrix that converts to target rect."""
- r = Rect(r)
- if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
- raise ValueError("rectangles must be finite and not empty")
- return (
- Matrix(1, 0, 0, 1, -self.x0, -self.y0)
- * Matrix(r.width / self.width, r.height / self.height)
- * Matrix(1, 0, 0, 1, r.x0, r.y0)
- )
- def transform(self, m):
- return Rect.transform(self, m).round()
- @property
- def width(self):
- return max(0, self.x1 - self.x0)
- br = bottom_right
- bl = bottom_left
- tl = top_left
- tr = top_right
- # Data
- #
- if 1:
- _self = sys.modules[__name__]
- if 1:
- for _name, _value in mupdf.__dict__.items():
- if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
- if _name.startswith('PDF_ENUM_NAME_'):
- # Not a simple enum.
- pass
- else:
- #assert not inspect.isroutine(value)
- #log(f'importing {_name=} {_value=}.')
- setattr(_self, _name, _value)
- #log(f'{getattr( self, name, None)=}')
- else:
- # This is slow due to importing inspect, e.g. 0.019 instead of 0.004.
- for _name, _value in inspect.getmembers(mupdf):
- if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
- if _name.startswith('PDF_ENUM_NAME_'):
- # Not a simple enum.
- pass
- else:
- #assert not inspect.isroutine(value)
- #log(f'importing {name}')
- setattr(_self, _name, _value)
- #log(f'{getattr( self, name, None)=}')
-
- # This is a macro so not preserved in mupdf C++/Python bindings.
- #
- PDF_SIGNATURE_DEFAULT_APPEARANCE = (0
- | mupdf.PDF_SIGNATURE_SHOW_LABELS
- | mupdf.PDF_SIGNATURE_SHOW_DN
- | mupdf.PDF_SIGNATURE_SHOW_DATE
- | mupdf.PDF_SIGNATURE_SHOW_TEXT_NAME
- | mupdf.PDF_SIGNATURE_SHOW_GRAPHIC_NAME
- | mupdf.PDF_SIGNATURE_SHOW_LOGO
- )
- #UCDN_SCRIPT_ADLAM = mupdf.UCDN_SCRIPT_ADLAM
- #setattr(self, 'UCDN_SCRIPT_ADLAM', mupdf.UCDN_SCRIPT_ADLAM)
-
- assert mupdf.UCDN_EAST_ASIAN_H == 1
-
- # Flake8 incorrectly fails next two lines because we've dynamically added
- # items to self.
- assert PDF_TX_FIELD_IS_MULTILINE == mupdf.PDF_TX_FIELD_IS_MULTILINE # noqa: F821
- assert UCDN_SCRIPT_ADLAM == mupdf.UCDN_SCRIPT_ADLAM # noqa: F821
- del _self, _name, _value
- AnyType = typing.Any
- Base14_fontnames = (
- "Courier",
- "Courier-Oblique",
- "Courier-Bold",
- "Courier-BoldOblique",
- "Helvetica",
- "Helvetica-Oblique",
- "Helvetica-Bold",
- "Helvetica-BoldOblique",
- "Times-Roman",
- "Times-Italic",
- "Times-Bold",
- "Times-BoldItalic",
- "Symbol",
- "ZapfDingbats",
- )
- Base14_fontdict = {}
- for f in Base14_fontnames:
- Base14_fontdict[f.lower()] = f
- Base14_fontdict["helv"] = "Helvetica"
- Base14_fontdict["heit"] = "Helvetica-Oblique"
- Base14_fontdict["hebo"] = "Helvetica-Bold"
- Base14_fontdict["hebi"] = "Helvetica-BoldOblique"
- Base14_fontdict["cour"] = "Courier"
- Base14_fontdict["coit"] = "Courier-Oblique"
- Base14_fontdict["cobo"] = "Courier-Bold"
- Base14_fontdict["cobi"] = "Courier-BoldOblique"
- Base14_fontdict["tiro"] = "Times-Roman"
- Base14_fontdict["tibo"] = "Times-Bold"
- Base14_fontdict["tiit"] = "Times-Italic"
- Base14_fontdict["tibi"] = "Times-BoldItalic"
- Base14_fontdict["symb"] = "Symbol"
- Base14_fontdict["zadb"] = "ZapfDingbats"
- EPSILON = 1e-5
- FLT_EPSILON = 1e-5
- # largest 32bit integers surviving C float conversion roundtrips
- # used by MuPDF to define infinite rectangles
- FZ_MIN_INF_RECT = -0x80000000
- FZ_MAX_INF_RECT = 0x7fffff80
- JM_annot_id_stem = "fitz"
- JM_mupdf_warnings_store = []
- JM_mupdf_show_errors = 1
- JM_mupdf_show_warnings = 0
- # ------------------------------------------------------------------------------
- # Image recompression constants
- # ------------------------------------------------------------------------------
- FZ_RECOMPRESS_NEVER = mupdf.FZ_RECOMPRESS_NEVER
- FZ_RECOMPRESS_SAME = mupdf.FZ_RECOMPRESS_SAME
- FZ_RECOMPRESS_LOSSLESS = mupdf.FZ_RECOMPRESS_LOSSLESS
- FZ_RECOMPRESS_JPEG = mupdf.FZ_RECOMPRESS_JPEG
- FZ_RECOMPRESS_J2K = mupdf.FZ_RECOMPRESS_J2K
- FZ_RECOMPRESS_FAX = mupdf.FZ_RECOMPRESS_FAX
- FZ_SUBSAMPLE_AVERAGE = mupdf.FZ_SUBSAMPLE_AVERAGE
- FZ_SUBSAMPLE_BICUBIC = mupdf.FZ_SUBSAMPLE_BICUBIC
- # ------------------------------------------------------------------------------
- # Various PDF Optional Content Flags
- # ------------------------------------------------------------------------------
- PDF_OC_ON = 0
- PDF_OC_TOGGLE = 1
- PDF_OC_OFF = 2
- # ------------------------------------------------------------------------------
- # link kinds and link flags
- # ------------------------------------------------------------------------------
- LINK_NONE = 0
- LINK_GOTO = 1
- LINK_URI = 2
- LINK_LAUNCH = 3
- LINK_NAMED = 4
- LINK_GOTOR = 5
- LINK_FLAG_L_VALID = 1
- LINK_FLAG_T_VALID = 2
- LINK_FLAG_R_VALID = 4
- LINK_FLAG_B_VALID = 8
- LINK_FLAG_FIT_H = 16
- LINK_FLAG_FIT_V = 32
- LINK_FLAG_R_IS_ZOOM = 64
- SigFlag_SignaturesExist = 1
- SigFlag_AppendOnly = 2
- STAMP_Approved = 0
- STAMP_AsIs = 1
- STAMP_Confidential = 2
- STAMP_Departmental = 3
- STAMP_Experimental = 4
- STAMP_Expired = 5
- STAMP_Final = 6
- STAMP_ForComment = 7
- STAMP_ForPublicRelease = 8
- STAMP_NotApproved = 9
- STAMP_NotForPublicRelease = 10
- STAMP_Sold = 11
- STAMP_TopSecret = 12
- STAMP_Draft = 13
- TEXT_ALIGN_LEFT = 0
- TEXT_ALIGN_CENTER = 1
- TEXT_ALIGN_RIGHT = 2
- TEXT_ALIGN_JUSTIFY = 3
- TEXT_FONT_SUPERSCRIPT = 1
- TEXT_FONT_ITALIC = 2
- TEXT_FONT_SERIFED = 4
- TEXT_FONT_MONOSPACED = 8
- TEXT_FONT_BOLD = 16
- TEXT_OUTPUT_TEXT = 0
- TEXT_OUTPUT_HTML = 1
- TEXT_OUTPUT_JSON = 2
- TEXT_OUTPUT_XML = 3
- TEXT_OUTPUT_XHTML = 4
- TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES
- TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE
- TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES
- TEXT_INHIBIT_SPACES = mupdf.FZ_STEXT_INHIBIT_SPACES
- TEXT_DEHYPHENATE = mupdf.FZ_STEXT_DEHYPHENATE
- TEXT_PRESERVE_SPANS = mupdf.FZ_STEXT_PRESERVE_SPANS
- TEXT_MEDIABOX_CLIP = mupdf.FZ_STEXT_MEDIABOX_CLIP
- TEXT_USE_CID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE
- TEXT_COLLECT_STRUCTURE = mupdf.FZ_STEXT_COLLECT_STRUCTURE
- TEXT_ACCURATE_BBOXES = mupdf.FZ_STEXT_ACCURATE_BBOXES
- TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS
- TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT
- TEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT
- if mupdf_version_tuple >= (1, 26):
- TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK
- TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT
- TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES
- TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE
- TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT
- TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS
- TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS
- # 2025-05-07: Non-standard names preserved for backwards compatibility.
- TEXT_STEXT_SEGMENT = TEXT_SEGMENT
- TEXT_CID_FOR_UNKNOWN_UNICODE = TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- TEXTFLAGS_WORDS = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_BLOCKS = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_DICT = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_PRESERVE_IMAGES
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT
- TEXTFLAGS_SEARCH = (0
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_DEHYPHENATE
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_HTML = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_PRESERVE_IMAGES
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_XHTML = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_PRESERVE_IMAGES
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_XML = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- TEXTFLAGS_TEXT = (0
- | TEXT_PRESERVE_LIGATURES
- | TEXT_PRESERVE_WHITESPACE
- | TEXT_MEDIABOX_CLIP
- | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
- )
- # Simple text encoding options
- TEXT_ENCODING_LATIN = 0
- TEXT_ENCODING_GREEK = 1
- TEXT_ENCODING_CYRILLIC = 2
- TOOLS_JM_UNIQUE_ID = 0
- # colorspace identifiers
- CS_RGB = 1
- CS_GRAY = 2
- CS_CMYK = 3
- # PDF Blend Modes
- PDF_BM_Color = "Color"
- PDF_BM_ColorBurn = "ColorBurn"
- PDF_BM_ColorDodge = "ColorDodge"
- PDF_BM_Darken = "Darken"
- PDF_BM_Difference = "Difference"
- PDF_BM_Exclusion = "Exclusion"
- PDF_BM_HardLight = "HardLight"
- PDF_BM_Hue = "Hue"
- PDF_BM_Lighten = "Lighten"
- PDF_BM_Luminosity = "Luminosity"
- PDF_BM_Multiply = "Multiply"
- PDF_BM_Normal = "Normal"
- PDF_BM_Overlay = "Overlay"
- PDF_BM_Saturation = "Saturation"
- PDF_BM_Screen = "Screen"
- PDF_BM_SoftLight = "Softlight"
- annot_skel = {
- "goto1": lambda a, b, c, d, e: f"<</A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>/Rect[{e}]/BS<</W 0>>/Subtype/Link>>",
- "goto2": lambda a, b: f"<</A<</S/GoTo/D{a}>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
- "gotor1": lambda a, b, c, d, e, f, g: f"<</A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F({e})/UF({f})/Type/Filespec>>>>/Rect[{g}]/BS<</W 0>>/Subtype/Link>>",
- "gotor2": lambda a, b, c: f"<</A<</S/GoToR/D{a}/F({b})>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
- "launch": lambda a, b, c: f"<</A<</S/Launch/F<</F({a})/UF({b})/Type/Filespec>>>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
- "uri": lambda a, b: f"<</A<</S/URI/URI({a})>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
- "named": lambda a, b: f"<</A<</S/GoTo/D({a})/Type/Action>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
- }
- class FileDataError(RuntimeError):
- """Raised for documents with file structure issues."""
- pass
- class FileNotFoundError(RuntimeError):
- """Raised if file does not exist."""
- pass
- class EmptyFileError(FileDataError):
- """Raised when creating documents from zero-length data."""
- pass
- # propagate exception class to C-level code
- #_set_FileDataError(FileDataError)
-
- csRGB = Colorspace(CS_RGB)
- csGRAY = Colorspace(CS_GRAY)
- csCMYK = Colorspace(CS_CMYK)
- # These don't appear to be visible in classic, but are used
- # internally.
- #
- dictkey_align = "align"
- dictkey_asc = "ascender"
- dictkey_bidi = "bidi"
- dictkey_bbox = "bbox"
- dictkey_blocks = "blocks"
- dictkey_bpc = "bpc"
- dictkey_c = "c"
- dictkey_chars = "chars"
- dictkey_color = "color"
- dictkey_colorspace = "colorspace"
- dictkey_content = "content"
- dictkey_creationDate = "creationDate"
- dictkey_cs_name = "cs-name"
- dictkey_da = "da"
- dictkey_dashes = "dashes"
- dictkey_descr = "description"
- dictkey_desc = "descender"
- dictkey_dir = "dir"
- dictkey_effect = "effect"
- dictkey_ext = "ext"
- dictkey_filename = "filename"
- dictkey_fill = "fill"
- dictkey_flags = "flags"
- dictkey_char_flags = "char_flags"
- dictkey_font = "font"
- dictkey_glyph = "glyph"
- dictkey_height = "height"
- dictkey_id = "id"
- dictkey_image = "image"
- dictkey_items = "items"
- dictkey_length = "length"
- dictkey_lines = "lines"
- dictkey_matrix = "transform"
- dictkey_modDate = "modDate"
- dictkey_name = "name"
- dictkey_number = "number"
- dictkey_origin = "origin"
- dictkey_rect = "rect"
- dictkey_size = "size"
- dictkey_smask = "smask"
- dictkey_spans = "spans"
- dictkey_stroke = "stroke"
- dictkey_style = "style"
- dictkey_subject = "subject"
- dictkey_text = "text"
- dictkey_title = "title"
- dictkey_type = "type"
- dictkey_ufilename = "ufilename"
- dictkey_width = "width"
- dictkey_wmode = "wmode"
- dictkey_xref = "xref"
- dictkey_xres = "xres"
- dictkey_yres = "yres"
- try:
- from pymupdf_fonts import fontdescriptors, fontbuffers
- fitz_fontdescriptors = fontdescriptors.copy()
- for k in fitz_fontdescriptors.keys():
- fitz_fontdescriptors[k]["loader"] = fontbuffers[k]
- del fontdescriptors, fontbuffers
- except ImportError:
- fitz_fontdescriptors = {}
- symbol_glyphs = ( # Glyph list for the built-in font 'Symbol'
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (32, 0.25),
- (33, 0.333),
- (34, 0.713),
- (35, 0.5),
- (36, 0.549),
- (37, 0.833),
- (38, 0.778),
- (39, 0.439),
- (40, 0.333),
- (41, 0.333),
- (42, 0.5),
- (43, 0.549),
- (44, 0.25),
- (45, 0.549),
- (46, 0.25),
- (47, 0.278),
- (48, 0.5),
- (49, 0.5),
- (50, 0.5),
- (51, 0.5),
- (52, 0.5),
- (53, 0.5),
- (54, 0.5),
- (55, 0.5),
- (56, 0.5),
- (57, 0.5),
- (58, 0.278),
- (59, 0.278),
- (60, 0.549),
- (61, 0.549),
- (62, 0.549),
- (63, 0.444),
- (64, 0.549),
- (65, 0.722),
- (66, 0.667),
- (67, 0.722),
- (68, 0.612),
- (69, 0.611),
- (70, 0.763),
- (71, 0.603),
- (72, 0.722),
- (73, 0.333),
- (74, 0.631),
- (75, 0.722),
- (76, 0.686),
- (77, 0.889),
- (78, 0.722),
- (79, 0.722),
- (80, 0.768),
- (81, 0.741),
- (82, 0.556),
- (83, 0.592),
- (84, 0.611),
- (85, 0.69),
- (86, 0.439),
- (87, 0.768),
- (88, 0.645),
- (89, 0.795),
- (90, 0.611),
- (91, 0.333),
- (92, 0.863),
- (93, 0.333),
- (94, 0.658),
- (95, 0.5),
- (96, 0.5),
- (97, 0.631),
- (98, 0.549),
- (99, 0.549),
- (100, 0.494),
- (101, 0.439),
- (102, 0.521),
- (103, 0.411),
- (104, 0.603),
- (105, 0.329),
- (106, 0.603),
- (107, 0.549),
- (108, 0.549),
- (109, 0.576),
- (110, 0.521),
- (111, 0.549),
- (112, 0.549),
- (113, 0.521),
- (114, 0.549),
- (115, 0.603),
- (116, 0.439),
- (117, 0.576),
- (118, 0.713),
- (119, 0.686),
- (120, 0.493),
- (121, 0.686),
- (122, 0.494),
- (123, 0.48),
- (124, 0.2),
- (125, 0.48),
- (126, 0.549),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (183, 0.46),
- (160, 0.25),
- (161, 0.62),
- (162, 0.247),
- (163, 0.549),
- (164, 0.167),
- (165, 0.713),
- (166, 0.5),
- (167, 0.753),
- (168, 0.753),
- (169, 0.753),
- (170, 0.753),
- (171, 1.042),
- (172, 0.713),
- (173, 0.603),
- (174, 0.987),
- (175, 0.603),
- (176, 0.4),
- (177, 0.549),
- (178, 0.411),
- (179, 0.549),
- (180, 0.549),
- (181, 0.576),
- (182, 0.494),
- (183, 0.46),
- (184, 0.549),
- (185, 0.549),
- (186, 0.549),
- (187, 0.549),
- (188, 1),
- (189, 0.603),
- (190, 1),
- (191, 0.658),
- (192, 0.823),
- (193, 0.686),
- (194, 0.795),
- (195, 0.987),
- (196, 0.768),
- (197, 0.768),
- (198, 0.823),
- (199, 0.768),
- (200, 0.768),
- (201, 0.713),
- (202, 0.713),
- (203, 0.713),
- (204, 0.713),
- (205, 0.713),
- (206, 0.713),
- (207, 0.713),
- (208, 0.768),
- (209, 0.713),
- (210, 0.79),
- (211, 0.79),
- (212, 0.89),
- (213, 0.823),
- (214, 0.549),
- (215, 0.549),
- (216, 0.713),
- (217, 0.603),
- (218, 0.603),
- (219, 1.042),
- (220, 0.987),
- (221, 0.603),
- (222, 0.987),
- (223, 0.603),
- (224, 0.494),
- (225, 0.329),
- (226, 0.79),
- (227, 0.79),
- (228, 0.786),
- (229, 0.713),
- (230, 0.384),
- (231, 0.384),
- (232, 0.384),
- (233, 0.384),
- (234, 0.384),
- (235, 0.384),
- (236, 0.494),
- (237, 0.494),
- (238, 0.494),
- (239, 0.494),
- (183, 0.46),
- (241, 0.329),
- (242, 0.274),
- (243, 0.686),
- (244, 0.686),
- (245, 0.686),
- (246, 0.384),
- (247, 0.549),
- (248, 0.384),
- (249, 0.384),
- (250, 0.384),
- (251, 0.384),
- (252, 0.494),
- (253, 0.494),
- (254, 0.494),
- (183, 0.46),
- )
- zapf_glyphs = ( # Glyph list for the built-in font 'ZapfDingbats'
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (32, 0.278),
- (33, 0.974),
- (34, 0.961),
- (35, 0.974),
- (36, 0.98),
- (37, 0.719),
- (38, 0.789),
- (39, 0.79),
- (40, 0.791),
- (41, 0.69),
- (42, 0.96),
- (43, 0.939),
- (44, 0.549),
- (45, 0.855),
- (46, 0.911),
- (47, 0.933),
- (48, 0.911),
- (49, 0.945),
- (50, 0.974),
- (51, 0.755),
- (52, 0.846),
- (53, 0.762),
- (54, 0.761),
- (55, 0.571),
- (56, 0.677),
- (57, 0.763),
- (58, 0.76),
- (59, 0.759),
- (60, 0.754),
- (61, 0.494),
- (62, 0.552),
- (63, 0.537),
- (64, 0.577),
- (65, 0.692),
- (66, 0.786),
- (67, 0.788),
- (68, 0.788),
- (69, 0.79),
- (70, 0.793),
- (71, 0.794),
- (72, 0.816),
- (73, 0.823),
- (74, 0.789),
- (75, 0.841),
- (76, 0.823),
- (77, 0.833),
- (78, 0.816),
- (79, 0.831),
- (80, 0.923),
- (81, 0.744),
- (82, 0.723),
- (83, 0.749),
- (84, 0.79),
- (85, 0.792),
- (86, 0.695),
- (87, 0.776),
- (88, 0.768),
- (89, 0.792),
- (90, 0.759),
- (91, 0.707),
- (92, 0.708),
- (93, 0.682),
- (94, 0.701),
- (95, 0.826),
- (96, 0.815),
- (97, 0.789),
- (98, 0.789),
- (99, 0.707),
- (100, 0.687),
- (101, 0.696),
- (102, 0.689),
- (103, 0.786),
- (104, 0.787),
- (105, 0.713),
- (106, 0.791),
- (107, 0.785),
- (108, 0.791),
- (109, 0.873),
- (110, 0.761),
- (111, 0.762),
- (112, 0.762),
- (113, 0.759),
- (114, 0.759),
- (115, 0.892),
- (116, 0.892),
- (117, 0.788),
- (118, 0.784),
- (119, 0.438),
- (120, 0.138),
- (121, 0.277),
- (122, 0.415),
- (123, 0.392),
- (124, 0.392),
- (125, 0.668),
- (126, 0.668),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (183, 0.788),
- (161, 0.732),
- (162, 0.544),
- (163, 0.544),
- (164, 0.91),
- (165, 0.667),
- (166, 0.76),
- (167, 0.76),
- (168, 0.776),
- (169, 0.595),
- (170, 0.694),
- (171, 0.626),
- (172, 0.788),
- (173, 0.788),
- (174, 0.788),
- (175, 0.788),
- (176, 0.788),
- (177, 0.788),
- (178, 0.788),
- (179, 0.788),
- (180, 0.788),
- (181, 0.788),
- (182, 0.788),
- (183, 0.788),
- (184, 0.788),
- (185, 0.788),
- (186, 0.788),
- (187, 0.788),
- (188, 0.788),
- (189, 0.788),
- (190, 0.788),
- (191, 0.788),
- (192, 0.788),
- (193, 0.788),
- (194, 0.788),
- (195, 0.788),
- (196, 0.788),
- (197, 0.788),
- (198, 0.788),
- (199, 0.788),
- (200, 0.788),
- (201, 0.788),
- (202, 0.788),
- (203, 0.788),
- (204, 0.788),
- (205, 0.788),
- (206, 0.788),
- (207, 0.788),
- (208, 0.788),
- (209, 0.788),
- (210, 0.788),
- (211, 0.788),
- (212, 0.894),
- (213, 0.838),
- (214, 1.016),
- (215, 0.458),
- (216, 0.748),
- (217, 0.924),
- (218, 0.748),
- (219, 0.918),
- (220, 0.927),
- (221, 0.928),
- (222, 0.928),
- (223, 0.834),
- (224, 0.873),
- (225, 0.828),
- (226, 0.924),
- (227, 0.924),
- (228, 0.917),
- (229, 0.93),
- (230, 0.931),
- (231, 0.463),
- (232, 0.883),
- (233, 0.836),
- (234, 0.836),
- (235, 0.867),
- (236, 0.867),
- (237, 0.696),
- (238, 0.696),
- (239, 0.874),
- (183, 0.788),
- (241, 0.874),
- (242, 0.76),
- (243, 0.946),
- (244, 0.771),
- (245, 0.865),
- (246, 0.771),
- (247, 0.888),
- (248, 0.967),
- (249, 0.888),
- (250, 0.831),
- (251, 0.873),
- (252, 0.927),
- (253, 0.97),
- (183, 0.788),
- (183, 0.788),
- )
- # Functions
- #
- def _read_samples( pixmap, offset, n):
- # fixme: need to be able to get a sample in one call, as a Python
- # bytes or similar.
- ret = []
- if not pixmap.samples():
- # mupdf.fz_samples_get() gives a segv if pixmap->samples is null.
- return ret
- for i in range( n):
- ret.append( mupdf.fz_samples_get( pixmap, offset + i))
- return bytes( ret)
- def _INRANGE(v, low, high):
- return low <= v and v <= high
- def _remove_dest_range(pdf, numbers):
- pagecount = mupdf.pdf_count_pages(pdf)
- for i in range(pagecount):
- n1 = i
- if n1 in numbers:
- continue
- pageref = mupdf.pdf_lookup_page_obj( pdf, i)
- annots = mupdf.pdf_dict_get( pageref, PDF_NAME('Annots'))
- if not annots.m_internal:
- continue
- len_ = mupdf.pdf_array_len(annots)
- for j in range(len_ - 1, -1, -1):
- o = mupdf.pdf_array_get( annots, j)
- if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( o, PDF_NAME('Subtype')), PDF_NAME('Link')):
- continue
- action = mupdf.pdf_dict_get( o, PDF_NAME('A'))
- dest = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
- if action.m_internal:
- if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( action, PDF_NAME('S')), PDF_NAME('GoTo')):
- continue
- dest = mupdf.pdf_dict_get( action, PDF_NAME('D'))
- pno = -1
- if mupdf.pdf_is_array( dest):
- target = mupdf.pdf_array_get( dest, 0)
- pno = mupdf.pdf_lookup_page_number( pdf, target)
- elif mupdf.pdf_is_string( dest):
- location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest))
- pno = location.page
- if pno < 0: # page number lookup did not work
- continue
- n1 = pno
- if n1 in numbers:
- mupdf.pdf_array_delete( annots, j)
- def ASSERT_PDF(cond):
- assert isinstance(cond, (mupdf.PdfPage, mupdf.PdfDocument)), f'{type(cond)=} {cond=}'
- if not cond.m_internal:
- raise Exception(MSG_IS_NO_PDF)
- def EMPTY_IRECT():
- return IRect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
- def EMPTY_QUAD():
- return EMPTY_RECT().quad
- def EMPTY_RECT():
- return Rect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
- def ENSURE_OPERATION(pdf):
- if not JM_have_operation(pdf):
- raise Exception("No journalling operation started")
- def INFINITE_IRECT():
- return IRect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
- def INFINITE_QUAD():
- return INFINITE_RECT().quad
- def INFINITE_RECT():
- return Rect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
- def JM_BinFromBuffer(buffer_):
- '''
- Turn fz_buffer into a Python bytes object
- '''
- assert isinstance(buffer_, mupdf.FzBuffer)
- ret = mupdf.fz_buffer_extract_copy(buffer_)
- return ret
- def JM_EscapeStrFromStr(c):
- # `c` is typically from SWIG which will have converted a `const char*` from
- # C into a Python `str` using `PyUnicode_DecodeUTF8(carray, static_cast<
- # Py_ssize_t >(size), "surrogateescape")`. This gives us a Python `str`
- # with some characters encoded as a \0xdcXY sequence, where `XY` are hex
- # digits for an invalid byte in the original `const char*`.
- #
- # This is actually a reasonable way of representing arbitrary
- # strings from C, but we want to mimic what PyMuPDF does. It uses
- # `PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace")`
- # which gives a string containing actual unicode characters for any invalid
- # bytes.
- #
- # We mimic this by converting the `str` to a `bytes` with 'surrogateescape'
- # to recognise \0xdcXY sequences, then convert the individual bytes into a
- # `str` using `chr()`.
- #
- # Would be good to have a more efficient way to do this.
- #
- if c is None:
- return ''
- assert isinstance(c, str), f'{type(c)=}'
- b = c.encode('utf8', 'surrogateescape')
- ret = ''
- for bb in b:
- ret += chr(bb)
- return ret
- def JM_BufferFromBytes(stream):
- '''
- Make fz_buffer from a PyBytes, PyByteArray or io.BytesIO object. If a text
- io.BytesIO, we convert to binary by encoding as utf8.
- '''
- if isinstance(stream, (bytes, bytearray)):
- data = stream
- elif hasattr(stream, 'getvalue'):
- data = stream.getvalue()
- if isinstance(data, str):
- data = data.encode('utf-8')
- if not isinstance(data, (bytes, bytearray)):
- raise Exception(f'.getvalue() returned unexpected type: {type(data)}')
- else:
- return mupdf.FzBuffer()
- return mupdf.fz_new_buffer_from_copied_data(data)
- def JM_FLOAT_ITEM(obj, idx):
- if not PySequence_Check(obj):
- return None
- return float(obj[idx])
- def JM_INT_ITEM(obj, idx):
- if idx < len(obj):
- temp = obj[idx]
- if isinstance(temp, (int, float)):
- return 0, temp
- return 1, None
- def JM_pixmap_from_page(doc, page, ctm, cs, alpha, annots, clip):
- '''
- Pixmap creation directly using a short-lived displaylist, so we can support
- separations.
- '''
- SPOTS_NONE = 0
- SPOTS_OVERPRINT_SIM = 1
- SPOTS_FULL = 2
-
- FZ_ENABLE_SPOT_RENDERING = True # fixme: this is a build-time setting in MuPDF's config.h.
- if FZ_ENABLE_SPOT_RENDERING:
- spots = SPOTS_OVERPRINT_SIM
- else:
- spots = SPOTS_NONE
- seps = None
- colorspace = cs
-
- matrix = JM_matrix_from_py(ctm)
- rect = mupdf.fz_bound_page(page)
- rclip = JM_rect_from_py(clip)
- rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
- rect = mupdf.fz_transform_rect(rect, matrix)
- bbox = mupdf.fz_round_rect(rect)
- # Pixmap of the document's /OutputIntents ("output intents")
- oi = mupdf.fz_document_output_intent(doc)
- # if present and compatible, use it instead of the parameter
- if oi.m_internal:
- if mupdf.fz_colorspace_n(oi) == mupdf.fz_colorspace_n(cs):
- colorspace = mupdf.fz_keep_colorspace(oi)
- # check if spots rendering is available and if so use separations
- if spots != SPOTS_NONE:
- seps = mupdf.fz_page_separations(page)
- if seps.m_internal:
- n = mupdf.fz_count_separations(seps)
- if spots == SPOTS_FULL:
- for i in range(n):
- mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_SPOT)
- else:
- for i in range(n):
- mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_COMPOSITE)
- elif mupdf.fz_page_uses_overprint(page):
- # This page uses overprint, so we need an empty
- # sep object to force the overprint simulation on.
- seps = mupdf.fz_new_separations(0)
- elif oi.m_internal and mupdf.fz_colorspace_n(oi) != mupdf.fz_colorspace_n(colorspace):
- # We have an output intent, and it's incompatible
- # with the colorspace our device needs. Force the
- # overprint simulation on, because this ensures that
- # we 'simulate' the output intent too.
- seps = mupdf.fz_new_separations(0)
- pix = mupdf.fz_new_pixmap_with_bbox(colorspace, bbox, seps, alpha)
- if alpha:
- mupdf.fz_clear_pixmap(pix)
- else:
- mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
- dev = mupdf.fz_new_draw_device(matrix, pix)
- if annots:
- mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- else:
- mupdf.fz_run_page_contents(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- return pix
- def JM_StrAsChar(x):
- # fixme: should encode, but swig doesn't pass bytes to C as const char*.
- return x
- #return x.encode('utf8')
- def JM_TUPLE(o: typing.Sequence) -> tuple:
- return tuple(map(lambda x: round(x, 5) if abs(x) >= 1e-4 else 0, o))
- def JM_TUPLE3(o: typing.Sequence) -> tuple:
- return tuple(map(lambda x: round(x, 3) if abs(x) >= 1e-3 else 0, o))
- def JM_UnicodeFromStr(s):
- if s is None:
- return ''
- if isinstance(s, bytes):
- s = s.decode('utf8')
- assert isinstance(s, str), f'{type(s)=} {s=}'
- return s
- def JM_add_annot_id(annot, stem):
- '''
- Add a unique /NM key to an annotation or widget.
- Append a number to 'stem' such that the result is a unique name.
- '''
- assert isinstance(annot, mupdf.PdfAnnot)
- page = _pdf_annot_page(annot)
- annot_obj = mupdf.pdf_annot_obj( annot)
- names = JM_get_annot_id_list(page)
- i = 0
- while 1:
- stem_id = f'{JM_annot_id_stem}-{stem}{i}'
- if stem_id not in names:
- break
- i += 1
- response = JM_StrAsChar(stem_id)
- name = mupdf.pdf_new_string( response, len(response))
- mupdf.pdf_dict_puts(annot_obj, "NM", name)
- page.doc().m_internal.resynth_required = 0
- def JM_add_oc_object(pdf, ref, xref):
- '''
- Add OC object reference to a dictionary
- '''
- indobj = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not mupdf.pdf_is_dict(indobj):
- RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
- type_ = mupdf.pdf_dict_get(indobj, PDF_NAME('Type'))
- if (mupdf.pdf_objcmp(type_, PDF_NAME('OCG')) == 0
- or mupdf.pdf_objcmp(type_, PDF_NAME('OCMD')) == 0
- ):
- mupdf.pdf_dict_put(ref, PDF_NAME('OC'), indobj)
- else:
- RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
- def JM_annot_border(annot_obj):
- dash_py = list()
- style = None
- width = -1
- clouds = -1
- obj = None
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Border'))
- if mupdf.pdf_is_array( obj):
- width = mupdf.pdf_to_real( mupdf.pdf_array_get( obj, 2))
- if mupdf.pdf_array_len( obj) == 4:
- dash = mupdf.pdf_array_get( obj, 3)
- for i in range( mupdf.pdf_array_len( dash)):
- val = mupdf.pdf_to_int( mupdf.pdf_array_get( dash, i))
- dash_py.append( val)
- bs_o = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BS'))
- if bs_o.m_internal:
- width = mupdf.pdf_to_real( mupdf.pdf_dict_get( bs_o, PDF_NAME('W')))
- style = mupdf.pdf_to_name( mupdf.pdf_dict_get( bs_o, PDF_NAME('S')))
- if style == '':
- style = None
- obj = mupdf.pdf_dict_get( bs_o, PDF_NAME('D'))
- if obj.m_internal:
- for i in range( mupdf.pdf_array_len( obj)):
- val = mupdf.pdf_to_int( mupdf.pdf_array_get( obj, i))
- dash_py.append( val)
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
- if obj.m_internal:
- clouds = mupdf.pdf_to_int( mupdf.pdf_dict_get( obj, PDF_NAME('I')))
- res = dict()
- res[ dictkey_width] = width
- res[ dictkey_dashes] = tuple( dash_py)
- res[ dictkey_style] = style
- res[ 'clouds'] = clouds
- return res
- def JM_annot_colors(annot_obj):
- res = dict()
- bc = list() # stroke colors
- fc =list() # fill colors
- o = mupdf.pdf_dict_get(annot_obj, mupdf.PDF_ENUM_NAME_C)
- if mupdf.pdf_is_array(o):
- n = mupdf.pdf_array_len(o)
- for i in range(n):
- col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
- bc.append(col)
- res[dictkey_stroke] = bc
- o = mupdf.pdf_dict_gets(annot_obj, "IC")
- if mupdf.pdf_is_array(o):
- n = mupdf.pdf_array_len(o)
- for i in range(n):
- col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
- fc.append(col)
- res[dictkey_fill] = fc
- return res
- def JM_annot_set_border( border, doc, annot_obj):
- assert isinstance(border, dict)
- obj = None
- dashlen = 0
- nwidth = border.get( dictkey_width) # new width
- ndashes = border.get( dictkey_dashes) # new dashes
- nstyle = border.get( dictkey_style) # new style
- nclouds = border.get( 'clouds', -1) # new clouds value
- # get old border properties
- oborder = JM_annot_border( annot_obj)
- # delete border-related entries
- mupdf.pdf_dict_del( annot_obj, PDF_NAME('BS'))
- mupdf.pdf_dict_del( annot_obj, PDF_NAME('BE'))
- mupdf.pdf_dict_del( annot_obj, PDF_NAME('Border'))
- # populate border items: keep old values for any omitted new ones
- if nwidth < 0:
- nwidth = oborder.get( dictkey_width) # no new width: keep current
- if ndashes is None:
- ndashes = oborder.get( dictkey_dashes) # no new dashes: keep old
- if nstyle is None:
- nstyle = oborder.get( dictkey_style) # no new style: keep old
- if nclouds < 0:
- nclouds = oborder.get( "clouds", -1) # no new clouds: keep old
- if isinstance( ndashes, tuple) and len( ndashes) > 0:
- dashlen = len( ndashes)
- darr = mupdf.pdf_new_array( doc, dashlen)
- for d in ndashes:
- mupdf.pdf_array_push_int( darr, d)
- mupdf.pdf_dict_putl( annot_obj, darr, PDF_NAME('BS'), PDF_NAME('D'))
- mupdf.pdf_dict_putl(
- annot_obj,
- mupdf.pdf_new_real( nwidth),
- PDF_NAME('BS'),
- PDF_NAME('W'),
- )
- if dashlen == 0:
- obj = JM_get_border_style( nstyle)
- else:
- obj = PDF_NAME('D')
- mupdf.pdf_dict_putl( annot_obj, obj, PDF_NAME('BS'), PDF_NAME('S'))
- if nclouds > 0:
- mupdf.pdf_dict_put_dict( annot_obj, PDF_NAME('BE'), 2)
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
- mupdf.pdf_dict_put( obj, PDF_NAME('S'), PDF_NAME('C'))
- mupdf.pdf_dict_put_int( obj, PDF_NAME('I'), nclouds)
- def make_escape(ch):
- if ch == 92:
- return "\\u005c"
- elif 32 <= ch <= 127 or ch == 10:
- return chr(ch)
- elif 0xd800 <= ch <= 0xdfff: # orphaned surrogate
- return "\\ufffd"
- elif ch <= 0xffff:
- return "\\u%04x" % ch
- else:
- return "\\U%08x" % ch
- def JM_append_rune(buff, ch):
- """
- APPEND non-ascii runes in unicode escape format to fz_buffer.
- """
- mupdf.fz_append_string(buff, make_escape(ch))
- def JM_append_word(lines, buff, wbbox, block_n, line_n, word_n):
- '''
- Functions for wordlist output
- '''
- s = JM_EscapeStrFromBuffer(buff)
- litem = (
- wbbox.x0,
- wbbox.y0,
- wbbox.x1,
- wbbox.y1,
- s,
- block_n,
- line_n,
- word_n,
- )
- lines.append(litem)
- return word_n + 1, mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word counter
- def JM_add_layer_config( pdf, name, creator, ON):
- '''
- Add OC configuration to the PDF catalog
- '''
- ocp = JM_ensure_ocproperties( pdf)
- configs = mupdf.pdf_dict_get( ocp, PDF_NAME('Configs'))
- if not mupdf.pdf_is_array( configs):
- configs = mupdf.pdf_dict_put_array( ocp, PDF_NAME('Configs'), 1)
- D = mupdf.pdf_new_dict( pdf, 5)
- mupdf.pdf_dict_put_text_string( D, PDF_NAME('Name'), name)
- if creator is not None:
- mupdf.pdf_dict_put_text_string( D, PDF_NAME('Creator'), creator)
- mupdf.pdf_dict_put( D, PDF_NAME('BaseState'), PDF_NAME('OFF'))
- onarray = mupdf.pdf_dict_put_array( D, PDF_NAME('ON'), 5)
- if not ON:
- pass
- else:
- ocgs = mupdf.pdf_dict_get( ocp, PDF_NAME('OCGs'))
- n = len(ON)
- for i in range(n):
- xref = 0
- e, xref = JM_INT_ITEM(ON, i)
- if e == 1:
- continue
- ind = mupdf.pdf_new_indirect( pdf, xref, 0)
- if mupdf.pdf_array_contains( ocgs, ind):
- mupdf.pdf_array_push( onarray, ind)
- mupdf.pdf_array_push( configs, D)
- def JM_char_bbox(line, ch):
- '''
- return rect of char quad
- '''
- q = JM_char_quad(line, ch)
- r = mupdf.fz_rect_from_quad(q)
- if not line.m_internal.wmode:
- return r
- if r.y1 < r.y0 + ch.m_internal.size:
- r.y0 = r.y1 - ch.m_internal.size
- return r
- def JM_char_font_flags(font, line, ch):
- flags = 0
- if line and ch:
- flags += detect_super_script(line, ch)
- flags += mupdf.fz_font_is_italic(font) * TEXT_FONT_ITALIC
- flags += mupdf.fz_font_is_serif(font) * TEXT_FONT_SERIFED
- flags += mupdf.fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED
- flags += mupdf.fz_font_is_bold(font) * TEXT_FONT_BOLD
- return flags
- def JM_char_quad(line, ch):
- '''
- re-compute char quad if ascender/descender values make no sense
- '''
- if 1 and g_use_extra:
- # This reduces time taken to extract text from PyMuPDF.pdf from 20s to
- # 15s.
- return mupdf.FzQuad(extra.JM_char_quad( line.m_internal, ch.m_internal))
-
- assert isinstance(line, mupdf.FzStextLine)
- assert isinstance(ch, mupdf.FzStextChar)
- if _globals.skip_quad_corrections: # no special handling
- return ch.quad
- if line.m_internal.wmode: # never touch vertical write mode
- return ch.quad
- font = mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))
- asc = JM_font_ascender(font)
- dsc = JM_font_descender(font)
- fsize = ch.m_internal.size
- asc_dsc = asc - dsc + FLT_EPSILON
- if asc_dsc >= 1 and _globals.small_glyph_heights == 0: # no problem
- return mupdf.FzQuad(ch.m_internal.quad)
- # Re-compute quad with adjusted ascender / descender values:
- # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
- # re-rotate and move back to ch->origin location.
- fsize = ch.m_internal.size
- bbox = mupdf.fz_font_bbox(font)
- fwidth = bbox.x1 - bbox.x0
- if asc < 1e-3: # probably Tesseract glyphless font
- dsc = -0.1
- asc = 0.9
- asc_dsc = 1.0
-
- if _globals.small_glyph_heights or asc_dsc < 1:
- dsc = dsc / asc_dsc
- asc = asc / asc_dsc
- asc_dsc = asc - dsc
- asc = asc * fsize / asc_dsc
- dsc = dsc * fsize / asc_dsc
-
- # Re-compute quad with the adjusted ascender / descender values:
- # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
- # re-rotate and move back to ch->origin location.
- c = line.m_internal.dir.x # cosine
- s = line.m_internal.dir.y # sine
- trm1 = mupdf.fz_make_matrix(c, -s, s, c, 0, 0) # derotate
- trm2 = mupdf.fz_make_matrix(c, s, -s, c, 0, 0) # rotate
- if (c == -1): # left-right flip
- trm1.d = 1
- trm2.d = 1
- xlate1 = mupdf.fz_make_matrix(1, 0, 0, 1, -ch.m_internal.origin.x, -ch.m_internal.origin.y)
- xlate2 = mupdf.fz_make_matrix(1, 0, 0, 1, ch.m_internal.origin.x, ch.m_internal.origin.y)
- quad = mupdf.fz_transform_quad(mupdf.FzQuad(ch.m_internal.quad), xlate1) # move origin to (0,0)
- quad = mupdf.fz_transform_quad(quad, trm1) # de-rotate corners
-
- # adjust vertical coordinates
- if c == 1 and quad.ul.y > 0: # up-down flip
- quad.ul.y = asc
- quad.ur.y = asc
- quad.ll.y = dsc
- quad.lr.y = dsc
- else:
- quad.ul.y = -asc
- quad.ur.y = -asc
- quad.ll.y = -dsc
- quad.lr.y = -dsc
- # adjust horizontal coordinates that are too crazy:
- # (1) left x must be >= 0
- # (2) if bbox width is 0, lookup char advance in font.
- if quad.ll.x < 0:
- quad.ll.x = 0
- quad.ul.x = 0
-
- cwidth = quad.lr.x - quad.ll.x
- if cwidth < FLT_EPSILON:
- glyph = mupdf.fz_encode_character( font, ch.m_internal.c)
- if glyph:
- fwidth = mupdf.fz_advance_glyph( font, glyph, line.m_internal.wmode)
- quad.lr.x = quad.ll.x + fwidth * fsize
- quad.ur.x = quad.lr.x
- quad = mupdf.fz_transform_quad(quad, trm2) # rotate back
- quad = mupdf.fz_transform_quad(quad, xlate2) # translate back
- return quad
- def JM_choice_options(annot):
- '''
- return list of choices for list or combo boxes
- '''
- annot_obj = mupdf.pdf_annot_obj( annot.this)
-
- opts = mupdf.pdf_choice_widget_options2( annot, 0)
- n = len( opts)
- if n == 0:
- return # wrong widget type
- optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Opt'))
- liste = []
- for i in range( n):
- m = mupdf.pdf_array_len( mupdf.pdf_array_get( optarr, i))
- if m == 2:
- val = (
- mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 0)),
- mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 1)),
- )
- liste.append( val)
- else:
- val = mupdf.pdf_to_text_string( mupdf.pdf_array_get( optarr, i))
- liste.append( val)
- return liste
- def JM_clear_pixmap_rect_with_value(dest, value, b):
- '''
- Clear a pixmap rectangle - my version also supports non-alpha pixmaps
- '''
- b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox(dest))
- w = b.x1 - b.x0
- y = b.y1 - b.y0
- if w <= 0 or y <= 0:
- return 0
- destspan = dest.stride()
- destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
- # CMYK needs special handling (and potentially any other subtractive colorspaces)
- if mupdf.fz_colorspace_n(dest.colorspace()) == 4:
- value = 255 - value
- while 1:
- s = destp
- for x in range(0, w):
- mupdf.fz_samples_set(dest, s, 0)
- s += 1
- mupdf.fz_samples_set(dest, s, 0)
- s += 1
- mupdf.fz_samples_set(dest, s, 0)
- s += 1
- mupdf.fz_samples_set(dest, s, value)
- s += 1
- if dest.alpha():
- mupdf.fz_samples_set(dest, s, 255)
- s += 1
- destp += destspan
- if y == 0:
- break
- y -= 1
- return 1
- while 1:
- s = destp
- for x in range(w):
- for k in range(dest.n()-1):
- mupdf.fz_samples_set(dest, s, value)
- s += 1
- if dest.alpha():
- mupdf.fz_samples_set(dest, s, 255)
- s += 1
- else:
- mupdf.fz_samples_set(dest, s, value)
- s += 1
- destp += destspan
- if y == 0:
- break
- y -= 1
- return 1
- def JM_color_FromSequence(color):
-
- if isinstance(color, (int, float)): # maybe just a single float
- color = [color]
-
- if not isinstance( color, (list, tuple)):
- return -1, []
-
- if len(color) not in (0, 1, 3, 4):
- return -1, []
-
- ret = color[:]
- for i in range(len(ret)):
- if ret[i] < 0 or ret[i] > 1:
- ret[i] = 1
- return len(ret), ret
- def JM_color_count( pm, clip):
- if g_use_extra:
- return extra.ll_JM_color_count(pm.m_internal, clip)
-
- rc = dict()
- cnt = 0
- irect = mupdf.fz_pixmap_bbox( pm)
- irect = mupdf.fz_intersect_irect(irect, mupdf.fz_round_rect(JM_rect_from_py(clip)))
- stride = pm.stride()
- width = irect.x1 - irect.x0
- height = irect.y1 - irect.y0
- n = pm.n()
- substride = width * n
- s = stride * (irect.y0 - pm.y()) + (irect.x0 - pm.x()) * n
- oldpix = _read_samples( pm, s, n)
- cnt = 0
- if mupdf.fz_is_empty_irect(irect):
- return rc
- for i in range( height):
- for j in range( 0, substride, n):
- newpix = _read_samples( pm, s + j, n)
- if newpix != oldpix:
- pixel = oldpix
- c = rc.get( pixel, None)
- if c is not None:
- cnt += c
- rc[ pixel] = cnt
- cnt = 1
- oldpix = newpix
- else:
- cnt += 1
- s += stride
- pixel = oldpix
- c = rc.get( pixel)
- if c is not None:
- cnt += c
- rc[ pixel] = cnt
- return rc
- def JM_compress_buffer(inbuffer):
- '''
- compress char* into a new buffer
- '''
- data, compressed_length = mupdf.fz_new_deflated_data_from_buffer(
- inbuffer,
- mupdf.FZ_DEFLATE_BEST,
- )
- #log( '{=data compressed_length}')
- if not data or compressed_length == 0:
- return None
- buf = mupdf.FzBuffer(mupdf.fz_new_buffer_from_data(data, compressed_length))
- mupdf.fz_resize_buffer(buf, compressed_length)
- return buf
- def JM_copy_rectangle(page, area):
- need_new_line = 0
- buffer = io.StringIO()
- for block in page:
- if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
- continue
- for line in block:
- line_had_text = 0
- for ch in line:
- r = JM_char_bbox(line, ch)
- if JM_rects_overlap(area, r):
- line_had_text = 1
- if need_new_line:
- buffer.write("\n")
- need_new_line = 0
- buffer.write(make_escape(ch.m_internal.c))
- if line_had_text:
- need_new_line = 1
- s = buffer.getvalue() # take over the data
- return s
- def JM_convert_to_pdf(doc, fp, tp, rotate):
- '''
- Convert any MuPDF document to a PDF
- Returns bytes object containing the PDF, created via 'write' function.
- '''
- pdfout = mupdf.PdfDocument()
- incr = 1
- s = fp
- e = tp
- if fp > tp:
- incr = -1 # count backwards
- s = tp # adjust ...
- e = fp # ... range
- rot = JM_norm_rotation(rotate)
- i = fp
- while 1: # interpret & write document pages as PDF pages
- if not _INRANGE(i, s, e):
- break
- page = mupdf.fz_load_page(doc, i)
- mediabox = mupdf.fz_bound_page(page)
- dev, resources, contents = mupdf.pdf_page_write(pdfout, mediabox)
- mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- dev = None
- page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents)
- mupdf.pdf_insert_page(pdfout, -1, page_obj)
- i += incr
- # PDF created - now write it to Python bytearray
- # prepare write options structure
- opts = mupdf.PdfWriteOptions()
- opts.do_garbage = 4
- opts.do_compress = 1
- opts.do_compress_images = 1
- opts.do_compress_fonts = 1
- opts.do_sanitize = 1
- opts.do_incremental = 0
- opts.do_ascii = 0
- opts.do_decompress = 0
- opts.do_linear = 0
- opts.do_clean = 1
- opts.do_pretty = 0
- res = mupdf.fz_new_buffer(8192)
- out = mupdf.FzOutput(res)
- mupdf.pdf_write_document(pdfout, out, opts)
- out.fz_close_output()
- c = mupdf.fz_buffer_extract_copy(res)
- assert isinstance(c, bytes)
- return c
- # Copied from MuPDF v1.14
- # Create widget
- def JM_create_widget(doc, page, type, fieldname):
- old_sigflags = mupdf.pdf_to_int(mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/SigFlags"))
- #log( '*** JM_create_widget()')
- #log( f'{mupdf.pdf_create_annot_raw=}')
- #log( f'{page=}')
- #log( f'{mupdf.PDF_ANNOT_WIDGET=}')
- annot = mupdf.pdf_create_annot_raw(page, mupdf.PDF_ANNOT_WIDGET)
- annot_obj = mupdf.pdf_annot_obj(annot)
- try:
- JM_set_field_type(doc, annot_obj, type)
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), fieldname)
- if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- sigflags = old_sigflags | (SigFlag_SignaturesExist | SigFlag_AppendOnly)
- mupdf.pdf_dict_putl(
- mupdf.pdf_trailer(doc),
- mupdf.pdf_new_int(sigflags),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('SigFlags'),
- )
- # pdf_create_annot will have linked the new widget into the page's
- # annot array. We also need it linked into the document's form
- form = mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/Fields")
- if not form.m_internal:
- form = mupdf.pdf_new_array(doc, 1)
- mupdf.pdf_dict_putl(
- mupdf.pdf_trailer(doc),
- form,
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('Fields'),
- )
- mupdf.pdf_array_push(form, annot_obj) # Cleanup relies on this statement being last
- except Exception:
- if g_exceptions_verbose: exception_info()
- mupdf.pdf_delete_annot(page, annot)
- if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- mupdf.pdf_dict_putl(
- mupdf.pdf_trailer(doc),
- mupdf.pdf_new_int(old_sigflags),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('SigFlags'),
- )
- raise
- return annot
- def JM_cropbox(page_obj):
- '''
- return a PDF page's CropBox
- '''
- if g_use_extra:
- return extra.JM_cropbox(page_obj)
-
- mediabox = JM_mediabox(page_obj)
- cropbox = mupdf.pdf_to_rect(
- mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('CropBox'))
- )
- if mupdf.fz_is_infinite_rect(cropbox) or mupdf.fz_is_empty_rect(cropbox):
- cropbox = mediabox
- y0 = mediabox.y1 - cropbox.y1
- y1 = mediabox.y1 - cropbox.y0
- cropbox.y0 = y0
- cropbox.y1 = y1
- return cropbox
- def JM_cropbox_size(page_obj):
- rect = JM_cropbox(page_obj)
- w = abs(rect.x1 - rect.x0)
- h = abs(rect.y1 - rect.y0)
- size = mupdf.fz_make_point(w, h)
- return size
- def JM_derotate_page_matrix(page):
- '''
- just the inverse of rotation
- '''
- mp = JM_rotate_page_matrix(page)
- return mupdf.fz_invert_matrix(mp)
- def JM_embed_file(
- pdf,
- buf,
- filename,
- ufilename,
- desc,
- compress,
- ):
- '''
- embed a new file in a PDF (not only /EmbeddedFiles entries)
- '''
- len_ = 0
- val = mupdf.pdf_new_dict(pdf, 6)
- mupdf.pdf_dict_put_dict(val, PDF_NAME('CI'), 4)
- ef = mupdf.pdf_dict_put_dict(val, PDF_NAME('EF'), 4)
- mupdf.pdf_dict_put_text_string(val, PDF_NAME('F'), filename)
- mupdf.pdf_dict_put_text_string(val, PDF_NAME('UF'), ufilename)
- mupdf.pdf_dict_put_text_string(val, PDF_NAME('Desc'), desc)
- mupdf.pdf_dict_put(val, PDF_NAME('Type'), PDF_NAME('Filespec'))
- bs = b' '
- f = mupdf.pdf_add_stream(
- pdf,
- #mupdf.fz_fz_new_buffer_from_copied_data(bs),
- mupdf.fz_new_buffer_from_copied_data(bs),
- mupdf.PdfObj(),
- 0,
- )
- mupdf.pdf_dict_put(ef, PDF_NAME('F'), f)
- JM_update_stream(pdf, f, buf, compress)
- len_, _ = mupdf.fz_buffer_storage(buf)
- mupdf.pdf_dict_put_int(f, PDF_NAME('DL'), len_)
- mupdf.pdf_dict_put_int(f, PDF_NAME('Length'), len_)
- params = mupdf.pdf_dict_put_dict(f, PDF_NAME('Params'), 4)
- mupdf.pdf_dict_put_int(params, PDF_NAME('Size'), len_)
- return val
- def JM_embedded_clean(pdf):
- '''
- perform some cleaning if we have /EmbeddedFiles:
- (1) remove any /Limits if /Names exists
- (2) remove any empty /Collection
- (3) set /PageMode/UseAttachments
- '''
- root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
- # remove any empty /Collection entry
- coll = mupdf.pdf_dict_get(root, PDF_NAME('Collection'))
- if coll.m_internal and mupdf.pdf_dict_len(coll) == 0:
- mupdf.pdf_dict_del(root, PDF_NAME('Collection'))
- efiles = mupdf.pdf_dict_getl(
- root,
- PDF_NAME('Names'),
- PDF_NAME('EmbeddedFiles'),
- PDF_NAME('Names'),
- )
- if efiles.m_internal:
- mupdf.pdf_dict_put_name(root, PDF_NAME('PageMode'), "UseAttachments")
- def JM_EscapeStrFromBuffer(buff):
- if not buff.m_internal:
- return ''
- s = mupdf.fz_buffer_extract_copy(buff)
- val = PyUnicode_DecodeRawUnicodeEscape(s, errors='replace')
- return val
- def JM_ensure_identity(pdf):
- '''
- Store ID in PDF trailer
- '''
- id_ = mupdf.pdf_dict_get( mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
- if not id_.m_internal:
- rnd0 = mupdf.fz_memrnd2(16)
- # Need to convert raw bytes into a str to send to
- # mupdf.pdf_new_string(). chr() seems to work for this.
- rnd = ''
- for i in rnd0:
- rnd += chr(i)
- id_ = mupdf.pdf_dict_put_array( mupdf.pdf_trailer( pdf), PDF_NAME('ID'), 2)
- mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
- mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
- def JM_ensure_ocproperties(pdf):
- '''
- Ensure OCProperties, return /OCProperties key
- '''
- ocp = mupdf.pdf_dict_get(mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')), PDF_NAME('OCProperties'))
- if ocp.m_internal:
- return ocp
- root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
- ocp = mupdf.pdf_dict_put_dict(root, PDF_NAME('OCProperties'), 2)
- mupdf.pdf_dict_put_array(ocp, PDF_NAME('OCGs'), 0)
- D = mupdf.pdf_dict_put_dict(ocp, PDF_NAME('D'), 5)
- mupdf.pdf_dict_put_array(D, PDF_NAME('ON'), 0)
- mupdf.pdf_dict_put_array(D, PDF_NAME('OFF'), 0)
- mupdf.pdf_dict_put_array(D, PDF_NAME('Order'), 0)
- mupdf.pdf_dict_put_array(D, PDF_NAME('RBGroups'), 0)
- return ocp
- def JM_expand_fname(name):
- '''
- Make /DA string of annotation
- '''
- if not name: return "Helv"
- if name.startswith("Co"): return "Cour"
- if name.startswith("co"): return "Cour"
- if name.startswith("Ti"): return "TiRo"
- if name.startswith("ti"): return "TiRo"
- if name.startswith("Sy"): return "Symb"
- if name.startswith("sy"): return "Symb"
- if name.startswith("Za"): return "ZaDb"
- if name.startswith("za"): return "ZaDb"
- return "Helv"
- def JM_field_type_text(wtype):
- '''
- String from widget type
- '''
- if wtype == mupdf.PDF_WIDGET_TYPE_BUTTON:
- return "Button"
- if wtype == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
- return "CheckBox"
- if wtype == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- return "RadioButton"
- if wtype == mupdf.PDF_WIDGET_TYPE_TEXT:
- return "Text"
- if wtype == mupdf.PDF_WIDGET_TYPE_LISTBOX:
- return "ListBox"
- if wtype == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
- return "ComboBox"
- if wtype == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- return "Signature"
- return "unknown"
- def JM_fill_pixmap_rect_with_color(dest, col, b):
- assert isinstance(dest, mupdf.FzPixmap)
- # fill a rect with a color tuple
- b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest))
- w = b.x1 - b.x0
- y = b.y1 - b.y0
- if w <= 0 or y <= 0:
- return 0
- destspan = dest.stride()
- destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
- while 1:
- s = destp
- for x in range(w):
- for i in range( dest.n()):
- mupdf.fz_samples_set(dest, s, col[i])
- s += 1
- destp += destspan
- y -= 1
- if y == 0:
- break
- return 1
- def JM_find_annot_irt(annot):
- '''
- Return the first annotation whose /IRT key ("In Response To") points to
- annot. Used to remove the response chain of a given annotation.
- '''
- assert isinstance(annot, mupdf.PdfAnnot)
- irt_annot = None # returning this
- annot_obj = mupdf.pdf_annot_obj(annot)
- found = 0
- # loop thru MuPDF's internal annots array
- page = _pdf_annot_page(annot)
- irt_annot = mupdf.pdf_first_annot(page)
- while 1:
- assert isinstance(irt_annot, mupdf.PdfAnnot)
- if not irt_annot.m_internal:
- break
- irt_annot_obj = mupdf.pdf_annot_obj(irt_annot)
- o = mupdf.pdf_dict_gets(irt_annot_obj, 'IRT')
- if o.m_internal:
- if not mupdf.pdf_objcmp(o, annot_obj):
- found = 1
- break
- irt_annot = mupdf.pdf_next_annot(irt_annot)
- if found:
- return irt_annot
- def JM_font_ascender(font):
- '''
- need own versions of ascender / descender
- '''
- assert isinstance(font, mupdf.FzFont)
- if _globals.skip_quad_corrections:
- return 0.8
- return mupdf.fz_font_ascender(font)
- def JM_font_descender(font):
- '''
- need own versions of ascender / descender
- '''
- assert isinstance(font, mupdf.FzFont)
- if _globals.skip_quad_corrections:
- return -0.2
- ret = mupdf.fz_font_descender(font)
- return ret
- def JM_is_word_delimiter(ch, delimiters):
- """Check if ch is an extra word delimiting character.
- """
- if (0
- or ch <= 32
- or ch == 160
- or 0x202a <= ch <= 0x202e
- ):
- # covers any whitespace plus unicodes that switch between
- # right-to-left and left-to-right languages
- return True
- if not delimiters: # no extra delimiters provided
- return False
- char = chr(ch)
- for d in delimiters:
- if d == char:
- return True
- return False
-
- def JM_is_rtl_char(ch):
- if ch < 0x590 or ch > 0x900:
- return False
- return True
- def JM_font_name(font):
- assert isinstance(font, mupdf.FzFont)
- name = mupdf.fz_font_name(font)
- s = name.find('+')
- if _globals.subset_fontnames or s == -1 or s != 6:
- return name
- return name[s + 1:]
- def JM_gather_fonts(pdf, dict_, fontlist, stream_xref):
- rc = 1
- n = mupdf.pdf_dict_len(dict_)
- for i in range(n):
- refname = mupdf.pdf_dict_get_key(dict_, i)
- fontdict = mupdf.pdf_dict_get_val(dict_, i)
- if not mupdf.pdf_is_dict(fontdict):
- mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no font dict ({mupdf.pdf_to_num(fontdict)} 0 R)")
- continue
- subtype = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Subtype)
- basefont = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_BaseFont)
- if not basefont.m_internal or mupdf.pdf_is_null(basefont):
- name = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Name)
- else:
- name = basefont
- encoding = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Encoding)
- if mupdf.pdf_is_dict(encoding):
- encoding = mupdf.pdf_dict_get(encoding, mupdf.PDF_ENUM_NAME_BaseEncoding)
- xref = mupdf.pdf_to_num(fontdict)
- ext = "n/a"
- if xref:
- ext = JM_get_fontextension(pdf, xref)
- entry = (
- xref,
- ext,
- mupdf.pdf_to_name(subtype),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(name)),
- mupdf.pdf_to_name(refname),
- mupdf.pdf_to_name(encoding),
- stream_xref,
- )
- fontlist.append(entry)
- return rc
- def JM_gather_forms(doc, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
- '''
- Store info of a /Form xobject in Python list
- '''
- assert isinstance(doc, mupdf.PdfDocument)
- rc = 1
- n = mupdf.pdf_dict_len(dict_)
- for i in range(n):
- refname = mupdf.pdf_dict_get_key( dict_, i)
- imagedict = mupdf.pdf_dict_get_val(dict_, i)
- if not mupdf.pdf_is_dict(imagedict):
- mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no form dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
- continue
- type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
- if not mupdf.pdf_name_eq(type_, PDF_NAME('Form')):
- continue
- o = mupdf.pdf_dict_get(imagedict, PDF_NAME('BBox'))
- m = mupdf.pdf_dict_get(imagedict, PDF_NAME('Matrix'))
- if m.m_internal:
- mat = mupdf.pdf_to_matrix(m)
- else:
- mat = mupdf.FzMatrix()
- if o.m_internal:
- bbox = mupdf.fz_transform_rect( mupdf.pdf_to_rect(o), mat)
- else:
- bbox = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
- xref = mupdf.pdf_to_num(imagedict)
- entry = (
- xref,
- mupdf.pdf_to_name( refname),
- stream_xref,
- JM_py_from_rect(bbox),
- )
- imagelist.append(entry)
- return rc
- def JM_gather_images(doc: mupdf.PdfDocument, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
- '''
- Store info of an image in Python list
- '''
- rc = 1
- n = mupdf.pdf_dict_len( dict_)
- for i in range(n):
- refname = mupdf.pdf_dict_get_key(dict_, i)
- imagedict = mupdf.pdf_dict_get_val(dict_, i)
- if not mupdf.pdf_is_dict(imagedict):
- mupdf.fz_warn(f"'{mupdf.pdf_to_name(refname)}' is no image dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
- continue
- type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
- if not mupdf.pdf_name_eq(type_, PDF_NAME('Image')):
- continue
- xref = mupdf.pdf_to_num(imagedict)
- gen = 0
- smask = mupdf.pdf_dict_geta(imagedict, PDF_NAME('SMask'), PDF_NAME('Mask'))
- if smask.m_internal:
- gen = mupdf.pdf_to_num(smask)
- filter_ = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Filter'), PDF_NAME('F'))
- if mupdf.pdf_is_array(filter_):
- filter_ = mupdf.pdf_array_get(filter_, 0)
- altcs = mupdf.PdfObj(0)
- cs = mupdf.pdf_dict_geta(imagedict, PDF_NAME('ColorSpace'), PDF_NAME('CS'))
- if mupdf.pdf_is_array(cs):
- cses = cs
- cs = mupdf.pdf_array_get(cses, 0)
- if (mupdf.pdf_name_eq(cs, PDF_NAME('DeviceN'))
- or mupdf.pdf_name_eq(cs, PDF_NAME('Separation'))
- ):
- altcs = mupdf.pdf_array_get(cses, 2)
- if mupdf.pdf_is_array(altcs):
- altcs = mupdf.pdf_array_get(altcs, 0)
- width = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Width'), PDF_NAME('W'))
- height = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Height'), PDF_NAME('H'))
- bpc = mupdf.pdf_dict_geta(imagedict, PDF_NAME('BitsPerComponent'), PDF_NAME('BPC'))
- entry = (
- xref,
- gen,
- mupdf.pdf_to_int(width),
- mupdf.pdf_to_int(height),
- mupdf.pdf_to_int(bpc),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(cs)),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(altcs)),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(refname)),
- JM_EscapeStrFromStr(mupdf.pdf_to_name(filter_)),
- stream_xref,
- )
- imagelist.append(entry)
- return rc
- def JM_get_annot_by_xref(page, xref):
- '''
- retrieve annot by its xref
- '''
- assert isinstance(page, mupdf.PdfPage)
- found = 0
- # loop thru MuPDF's internal annots array
- annot = mupdf.pdf_first_annot(page)
- while 1:
- if not annot.m_internal:
- break
- if xref == mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)):
- found = 1
- break
- annot = mupdf.pdf_next_annot( annot)
- if not found:
- raise Exception("xref %d is not an annot of this page" % xref)
- return annot
- def JM_get_annot_by_name(page, name):
- '''
- retrieve annot by name (/NM key)
- '''
- assert isinstance(page, mupdf.PdfPage)
- if not name:
- return
- found = 0
- # loop thru MuPDF's internal annots and widget arrays
- annot = mupdf.pdf_first_annot(page)
- while 1:
- if not annot.m_internal:
- break
- response, len_ = mupdf.pdf_to_string(mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM"))
- if name == response:
- found = 1
- break
- annot = mupdf.pdf_next_annot(annot)
- if not found:
- raise Exception("'%s' is not an annot of this page" % name)
- return annot
- def JM_get_annot_id_list(page):
- names = []
- annots = mupdf.pdf_dict_get( page.obj(), mupdf.PDF_ENUM_NAME_Annots)
- if not annots.m_internal:
- return names
- for i in range( mupdf.pdf_array_len(annots)):
- annot_obj = mupdf.pdf_array_get(annots, i)
- name = mupdf.pdf_dict_gets(annot_obj, "NM")
- if name.m_internal:
- names.append(
- mupdf.pdf_to_text_string(name)
- )
- return names
- def JM_get_annot_xref_list( page_obj):
- '''
- return the xrefs and /NM ids of a page's annots, links and fields
- '''
- if g_use_extra:
- names = extra.JM_get_annot_xref_list( page_obj)
- return names
-
- names = []
- annots = mupdf.pdf_dict_get( page_obj, PDF_NAME('Annots'))
- n = mupdf.pdf_array_len( annots)
- for i in range( n):
- annot_obj = mupdf.pdf_array_get( annots, i)
- xref = mupdf.pdf_to_num( annot_obj)
- subtype = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Subtype'))
- if not subtype.m_internal:
- continue # subtype is required
- type_ = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subtype))
- if type_ == mupdf.PDF_ANNOT_UNKNOWN:
- continue # only accept valid annot types
- id_ = mupdf.pdf_dict_gets( annot_obj, "NM")
- names.append( (xref, type_, mupdf.pdf_to_text_string( id_)))
- return names
- def JM_get_annot_xref_list2(page):
- page = page._pdf_page(required=False)
- if not page.m_internal:
- return list()
- return JM_get_annot_xref_list( page.obj())
- def JM_get_border_style(style):
- '''
- return pdf_obj "border style" from Python str
- '''
- val = mupdf.PDF_ENUM_NAME_S
- if style is None:
- return val
- s = style
- if s.startswith("b") or s.startswith("B"): val = mupdf.PDF_ENUM_NAME_B
- elif s.startswith("d") or s.startswith("D"): val = mupdf.PDF_ENUM_NAME_D
- elif s.startswith("i") or s.startswith("I"): val = mupdf.PDF_ENUM_NAME_I
- elif s.startswith("u") or s.startswith("U"): val = mupdf.PDF_ENUM_NAME_U
- elif s.startswith("s") or s.startswith("S"): val = mupdf.PDF_ENUM_NAME_S
- return val
- def JM_get_font(
- fontname,
- fontfile,
- fontbuffer,
- script,
- lang,
- ordering,
- is_bold,
- is_italic,
- is_serif,
- embed,
- ):
- '''
- return a fz_font from a number of parameters
- '''
- def fertig(font):
- if not font.m_internal:
- raise RuntimeError(MSG_FONT_FAILED)
- # if font allows this, set embedding
- if not font.m_internal.flags.never_embed:
- mupdf.fz_set_font_embedding(font, embed)
- return font
-
- index = 0
- font = None
- if fontfile:
- #goto have_file;
- font = mupdf.fz_new_font_from_file( None, fontfile, index, 0)
- return fertig(font)
- if fontbuffer:
- #goto have_buffer;
- res = JM_BufferFromBytes(fontbuffer)
- font = mupdf.fz_new_font_from_buffer( None, res, index, 0)
- return fertig(font)
- if ordering > -1:
- # goto have_cjk;
- font = mupdf.fz_new_cjk_font(ordering)
- return fertig(font)
- if fontname:
- # goto have_base14;
- # Base-14 or a MuPDF builtin font
- font = mupdf.fz_new_base14_font(fontname)
- if font.m_internal:
- return fertig(font)
- font = mupdf.fz_new_builtin_font(fontname, is_bold, is_italic)
- return fertig(font)
-
- # Check for NOTO font
- #have_noto:;
- data, size, index = mupdf.fz_lookup_noto_font( script, lang)
- font = None
- if data:
- font = mupdf.fz_new_font_from_memory( None, data, size, index, 0)
- if font.m_internal:
- return fertig(font)
- font = mupdf.fz_load_fallback_font( script, lang, is_serif, is_bold, is_italic)
- return fertig(font)
-
- def JM_get_fontbuffer(doc, xref):
- '''
- Return the contents of a font file, identified by xref
- '''
- if xref < 1:
- return
- o = mupdf.pdf_load_object(doc, xref)
- desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
- if desft.m_internal:
- obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
- obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
- else:
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
- if not obj.m_internal:
- message(f"invalid font - FontDescriptor missing")
- return
- o = obj
- stream = None
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
- if obj.m_internal:
- stream = obj # ext = "pfa"
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
- if obj.m_internal:
- stream = obj # ext = "ttf"
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
- if obj.m_internal:
- stream = obj
- obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
- if obj.m_internal and not mupdf.pdf_is_name(obj):
- message("invalid font descriptor subtype")
- return
- if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
- pass # Prev code did: ext = "cff", but this has no effect.
- elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
- pass # Prev code did: ext = "cid", but this has no effect.
- elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
- pass # Prev code did: ext = "otf", but this has no effect. */
- else:
- message('warning: unhandled font type {pdf_to_name(ctx, obj)!r}')
- if not stream:
- message('warning: unhandled font type')
- return
- return mupdf.pdf_load_stream(stream)
- def JM_get_resource_properties(ref):
- '''
- Return the items of Resources/Properties (used for Marked Content)
- Argument may be e.g. a page object or a Form XObject
- '''
- properties = mupdf.pdf_dict_getl(ref, PDF_NAME('Resources'), PDF_NAME('Properties'))
- if not properties.m_internal:
- return ()
- else:
- n = mupdf.pdf_dict_len(properties)
- if n < 1:
- return ()
- rc = []
- for i in range(n):
- key = mupdf.pdf_dict_get_key(properties, i)
- val = mupdf.pdf_dict_get_val(properties, i)
- c = mupdf.pdf_to_name(key)
- xref = mupdf.pdf_to_num(val)
- rc.append((c, xref))
- return rc
- def JM_get_widget_by_xref( page, xref):
- '''
- retrieve widget by its xref
- '''
- found = False
- annot = mupdf.pdf_first_widget( page)
- while annot.m_internal:
- annot_obj = mupdf.pdf_annot_obj( annot)
- if xref == mupdf.pdf_to_num( annot_obj):
- found = True
- break
- annot = mupdf.pdf_next_widget( annot)
- if not found:
- raise Exception( f"xref {xref} is not a widget of this page")
- return Annot( annot)
- def JM_get_widget_properties(annot, Widget):
- '''
- Populate a Python Widget object with the values from a PDF form field.
- Called by "Page.first_widget" and "Widget.next".
- '''
- #log( '{type(annot)=}')
- annot_obj = mupdf.pdf_annot_obj(annot.this)
- #log( 'Have called mupdf.pdf_annot_obj()')
- page = _pdf_annot_page(annot.this)
- pdf = page.doc()
- tw = annot
- def SETATTR(key, value):
- setattr(Widget, key, value)
- def SETATTR_DROP(mod, key, value):
- # Original C code for this function deletes if PyObject* is NULL. We
- # don't have a representation for that in Python - e.g. None is not
- # represented by NULL.
- setattr(mod, key, value)
- #log( '=== + mupdf.pdf_widget_type(tw)')
- field_type = mupdf.pdf_widget_type(tw.this)
- #log( '=== - mupdf.pdf_widget_type(tw)')
- Widget.field_type = field_type
- if field_type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- if mupdf.pdf_signature_is_signed(pdf, annot_obj):
- SETATTR("is_signed", True)
- else:
- SETATTR("is_signed",False)
- else:
- SETATTR("is_signed", None)
- SETATTR_DROP(Widget, "border_style", JM_UnicodeFromStr(mupdf.pdf_field_border_style(annot_obj)))
- SETATTR_DROP(Widget, "field_type_string", JM_UnicodeFromStr(JM_field_type_text(field_type)))
- field_name = mupdf.pdf_load_field_name(annot_obj)
- SETATTR_DROP(Widget, "field_name", field_name)
- def pdf_dict_get_inheritable_nonempty_label(node, key):
- '''
- This is a modified version of MuPDF's pdf_dict_get_inheritable(), with
- some changes:
- * Returns string from pdf_to_text_string() or None if not found.
- * Recurses to parent if current node exists but with empty string
- value.
- '''
- slow = node
- halfbeat = 11 # Don't start moving slow pointer for a while.
- while 1:
- if not node.m_internal:
- return
- val = mupdf.pdf_dict_get(node, key)
- if val.m_internal:
- label = mupdf.pdf_to_text_string(val)
- if label:
- return label
- node = mupdf.pdf_dict_get(node, PDF_NAME('Parent'))
- if node.m_internal == slow.m_internal:
- raise Exception("cycle in resources")
- halfbeat -= 1
- if halfbeat == 0:
- slow = mupdf.pdf_dict_get(slow, PDF_NAME('Parent'))
- halfbeat = 2
-
- # In order to address #3950, we use our modified pdf_dict_get_inheritable()
- # to ignore empty-string child values.
- label = pdf_dict_get_inheritable_nonempty_label(annot_obj, PDF_NAME('TU'))
- if label is not None:
- SETATTR_DROP(Widget, "field_label", label)
- fvalue = None
- if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Parent')) # owning RB group
- if obj.m_internal:
- SETATTR_DROP(Widget, "rb_parent", mupdf.pdf_to_num( obj))
- obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('AS'))
- if obj.m_internal:
- fvalue = mupdf.pdf_to_name(obj)
- if not fvalue:
- fvalue = mupdf.pdf_field_value(annot_obj)
- SETATTR_DROP(Widget, "field_value", JM_UnicodeFromStr(fvalue))
- SETATTR_DROP(Widget, "field_display", mupdf.pdf_field_display(annot_obj))
- border_width = mupdf.pdf_to_real(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('W')))
- if border_width == 0:
- border_width = 1
- SETATTR_DROP(Widget, "border_width", border_width)
- obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('D'))
- if mupdf.pdf_is_array(obj):
- n = mupdf.pdf_array_len(obj)
- d = [0] * n
- for i in range(n):
- d[i] = mupdf.pdf_to_int(mupdf.pdf_array_get(obj, i))
- SETATTR_DROP(Widget, "border_dashes", d)
- SETATTR_DROP(Widget, "text_maxlen", mupdf.pdf_text_widget_max_len(tw.this))
- SETATTR_DROP(Widget, "text_format", mupdf.pdf_text_widget_format(tw.this))
- obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BG'))
- if mupdf.pdf_is_array(obj):
- n = mupdf.pdf_array_len(obj)
- col = [0] * n
- for i in range(n):
- col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
- SETATTR_DROP(Widget, "fill_color", col)
- obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BC'))
- if mupdf.pdf_is_array(obj):
- n = mupdf.pdf_array_len(obj)
- col = [0] * n
- for i in range(n):
- col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
- SETATTR_DROP(Widget, "border_color", col)
- SETATTR_DROP(Widget, "choice_values", JM_choice_options(annot))
- da = mupdf.pdf_to_text_string(mupdf.pdf_dict_get_inheritable(annot_obj, PDF_NAME('DA')))
- SETATTR_DROP(Widget, "_text_da", JM_UnicodeFromStr(da))
- obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('CA'))
- if obj.m_internal:
- SETATTR_DROP(Widget, "button_caption", JM_UnicodeFromStr(mupdf.pdf_to_text_string(obj)))
- SETATTR_DROP(Widget, "field_flags", mupdf.pdf_field_flags(annot_obj))
- # call Py method to reconstruct text color, font name, size
- Widget._parse_da()
- # extract JavaScript action texts
- s = mupdf.pdf_dict_get(annot_obj, PDF_NAME('A'))
- ss = JM_get_script(s)
- SETATTR_DROP(Widget, "script", ss)
- SETATTR_DROP(Widget, "script_stroke",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('K')))
- )
- SETATTR_DROP(Widget, "script_format",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('F')))
- )
- SETATTR_DROP(Widget, "script_change",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('V')))
- )
- SETATTR_DROP(Widget, "script_calc",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('C')))
- )
- SETATTR_DROP(Widget, "script_blur",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl')))
- )
- SETATTR_DROP(Widget, "script_focus",
- JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo')))
- )
- def JM_get_fontextension(doc, xref):
- '''
- Return the file extension of a font file, identified by xref
- '''
- if xref < 1:
- return "n/a"
- o = mupdf.pdf_load_object(doc, xref)
- desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
- if desft.m_internal:
- obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
- obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
- else:
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
- if not obj.m_internal:
- return "n/a" # this is a base-14 font
- o = obj # we have the FontDescriptor
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
- if obj.m_internal:
- return "pfa"
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
- if obj.m_internal:
- return "ttf"
- obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
- if obj.m_internal:
- obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
- if obj.m_internal and not mupdf.pdf_is_name(obj):
- message("invalid font descriptor subtype")
- return "n/a"
- if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
- return "cff"
- elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
- return "cid"
- elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
- return "otf"
- else:
- message("unhandled font type '%s'", mupdf.pdf_to_name(obj))
- return "n/a"
- def JM_get_ocg_arrays_imp(arr):
- '''
- Get OCG arrays from OC configuration
- Returns dict {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list}
- '''
- list_ = list()
- if mupdf.pdf_is_array( arr):
- n = mupdf.pdf_array_len( arr)
- for i in range(n):
- obj = mupdf.pdf_array_get( arr, i)
- item = mupdf.pdf_to_num( obj)
- if item not in list_:
- list_.append(item)
- return list_
- def JM_get_ocg_arrays(conf):
- rc = dict()
- arr = mupdf.pdf_dict_get( conf, PDF_NAME('ON'))
- list_ = JM_get_ocg_arrays_imp( arr)
- if list_:
- rc["on"] = list_
- arr = mupdf.pdf_dict_get( conf, PDF_NAME('OFF'))
- list_ = JM_get_ocg_arrays_imp( arr)
- if list_:
- rc["off"] = list_
- arr = mupdf.pdf_dict_get( conf, PDF_NAME('Locked'))
- list_ = JM_get_ocg_arrays_imp( arr)
- if list_:
- rc['locked'] = list_
- list_ = list()
- arr = mupdf.pdf_dict_get( conf, PDF_NAME('RBGroups'))
- if mupdf.pdf_is_array( arr):
- n = mupdf.pdf_array_len( arr)
- for i in range(n):
- obj = mupdf.pdf_array_get( arr, i)
- list1 = JM_get_ocg_arrays_imp( obj)
- list_.append(list1)
- if list_:
- rc["rbgroups"] = list_
- obj = mupdf.pdf_dict_get( conf, PDF_NAME('BaseState'))
- if obj.m_internal:
- state = mupdf.pdf_to_name( obj)
- rc["basestate"] = state
- return rc
- def JM_get_page_labels(liste, nums):
- n = mupdf.pdf_array_len(nums)
- for i in range(0, n, 2):
- key = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i))
- pno = mupdf.pdf_to_int(key)
- val = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i + 1))
- res = JM_object_to_buffer(val, 1, 0)
- c = mupdf.fz_buffer_extract(res)
- assert isinstance(c, bytes)
- c = c.decode('utf-8')
- liste.append( (pno, c))
- def JM_get_script(key):
- '''
- JavaScript extractor
- Returns either the script source or None. Parameter is a PDF action
- dictionary, which must have keys /S and /JS. The value of /S must be
- '/JavaScript'. The value of /JS is returned.
- '''
- if not key.m_internal:
- return
- j = mupdf.pdf_dict_get(key, PDF_NAME('S'))
- jj = mupdf.pdf_to_name(j)
- if jj == "JavaScript":
- js = mupdf.pdf_dict_get(key, PDF_NAME('JS'))
- if not js.m_internal:
- return
- else:
- return
- if mupdf.pdf_is_string(js):
- script = JM_UnicodeFromStr(mupdf.pdf_to_text_string(js))
- elif mupdf.pdf_is_stream(js):
- res = mupdf.pdf_load_stream(js)
- script = JM_EscapeStrFromBuffer(res)
- else:
- return
- if script: # do not return an empty script
- return script
- return
- def JM_have_operation(pdf):
- '''
- Ensure valid journalling state
- '''
- if pdf.m_internal.journal and not mupdf.pdf_undoredo_step(pdf, 0):
- return 0
- return 1
- def JM_image_extension(type_):
- '''
- return extension for MuPDF image type
- '''
- if type_ == mupdf.FZ_IMAGE_FAX: return "fax"
- if type_ == mupdf.FZ_IMAGE_RAW: return "raw"
- if type_ == mupdf.FZ_IMAGE_FLATE: return "flate"
- if type_ == mupdf.FZ_IMAGE_LZW: return "lzw"
- if type_ == mupdf.FZ_IMAGE_RLD: return "rld"
- if type_ == mupdf.FZ_IMAGE_BMP: return "bmp"
- if type_ == mupdf.FZ_IMAGE_GIF: return "gif"
- if type_ == mupdf.FZ_IMAGE_JBIG2: return "jb2"
- if type_ == mupdf.FZ_IMAGE_JPEG: return "jpeg"
- if type_ == mupdf.FZ_IMAGE_JPX: return "jpx"
- if type_ == mupdf.FZ_IMAGE_JXR: return "jxr"
- if type_ == mupdf.FZ_IMAGE_PNG: return "png"
- if type_ == mupdf.FZ_IMAGE_PNM: return "pnm"
- if type_ == mupdf.FZ_IMAGE_TIFF: return "tiff"
- #if type_ == mupdf.FZ_IMAGE_PSD: return "psd"
- return "n/a"
- # fixme: need to avoid using a global for this.
- g_img_info = None
- def JM_image_filter(opaque, ctm, name, image):
- assert isinstance(ctm, mupdf.FzMatrix)
- r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm)
- q = mupdf.fz_transform_quad( q, g_img_info_matrix)
- temp = name, JM_py_from_quad(q)
- g_img_info.append(temp)
- def JM_image_profile( imagedata, keep_image):
- '''
- Return basic properties of an image provided as bytes or bytearray
- The function creates an fz_image and optionally returns it.
- '''
- if not imagedata:
- return None # nothing given
-
- len_ = len( imagedata)
- if len_ < 8:
- message( "bad image data")
- return None
- c = imagedata
- #log( 'calling mfz_recognize_image_format with {c!r=}')
- type_ = mupdf.fz_recognize_image_format( c)
- if type_ == mupdf.FZ_IMAGE_UNKNOWN:
- return None
- if keep_image:
- res = mupdf.fz_new_buffer_from_copied_data( c, len_)
- else:
- res = mupdf.fz_new_buffer_from_shared_data( c, len_)
- image = mupdf.fz_new_image_from_buffer( res)
- ctm = mupdf.fz_image_orientation_matrix( image)
- xres, yres = mupdf.fz_image_resolution(image)
- orientation = mupdf.fz_image_orientation( image)
- cs_name = mupdf.fz_colorspace_name( image.colorspace())
- result = dict()
- result[ dictkey_width] = image.w()
- result[ dictkey_height] = image.h()
- result[ "orientation"] = orientation
- result[ dictkey_matrix] = JM_py_from_matrix(ctm)
- result[ dictkey_xres] = xres
- result[ dictkey_yres] = yres
- result[ dictkey_colorspace] = image.n()
- result[ dictkey_bpc] = image.bpc()
- result[ dictkey_ext] = JM_image_extension(type_)
- result[ dictkey_cs_name] = cs_name
- if keep_image:
- result[ dictkey_image] = image
- return result
- def JM_image_reporter(page):
- doc = page.doc()
- global g_img_info_matrix
- g_img_info_matrix = mupdf.FzMatrix()
- mediabox = mupdf.FzRect()
- mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix)
- class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
- def __init__(self):
- super().__init__()
- self.use_virtual_image_filter()
- def image_filter(self, ctx, ctm, name, image, scissor):
- JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
- sanitize_filter_options = SanitizeFilterOptions()
- filter_options = _make_PdfFilterOptions(
- instance_forms=1,
- ascii=1,
- no_update=1,
- sanitize=1,
- sopts=sanitize_filter_options,
- )
- global g_img_info
- g_img_info = []
- mupdf.pdf_filter_page_contents( doc, page, filter_options)
- rc = tuple(g_img_info)
- g_img_info = []
- return rc
- def JM_fitz_config():
- have_TOFU = not hasattr(mupdf, 'TOFU')
- have_TOFU_BASE14 = not hasattr(mupdf, 'TOFU_BASE14')
- have_TOFU_CJK = not hasattr(mupdf, 'TOFU_CJK')
- have_TOFU_CJK_EXT = not hasattr(mupdf, 'TOFU_CJK_EXT')
- have_TOFU_CJK_LANG = not hasattr(mupdf, 'TOFU_CJK_LANG')
- have_TOFU_EMOJI = not hasattr(mupdf, 'TOFU_EMOJI')
- have_TOFU_HISTORIC = not hasattr(mupdf, 'TOFU_HISTORIC')
- have_TOFU_SIL = not hasattr(mupdf, 'TOFU_SIL')
- have_TOFU_SYMBOL = not hasattr(mupdf, 'TOFU_SYMBOL')
-
- ret = dict()
- ret["base14"] = have_TOFU_BASE14
- ret["cbz"] = bool(mupdf.FZ_ENABLE_CBZ)
- ret["epub"] = bool(mupdf.FZ_ENABLE_EPUB)
- ret["html"] = bool(mupdf.FZ_ENABLE_HTML)
- ret["icc"] = bool(mupdf.FZ_ENABLE_ICC)
- ret["img"] = bool(mupdf.FZ_ENABLE_IMG)
- ret["jpx"] = bool(mupdf.FZ_ENABLE_JPX)
- ret["js"] = bool(mupdf.FZ_ENABLE_JS)
- ret["pdf"] = bool(mupdf.FZ_ENABLE_PDF)
- ret["plotter-cmyk"] = bool(mupdf.FZ_PLOTTERS_CMYK)
- ret["plotter-g"] = bool(mupdf.FZ_PLOTTERS_G)
- ret["plotter-n"] = bool(mupdf.FZ_PLOTTERS_N)
- ret["plotter-rgb"] = bool(mupdf.FZ_PLOTTERS_RGB)
- ret["py-memory"] = bool(JM_MEMORY)
- ret["svg"] = bool(mupdf.FZ_ENABLE_SVG)
- ret["tofu"] = have_TOFU
- ret["tofu-cjk"] = have_TOFU_CJK
- ret["tofu-cjk-ext"] = have_TOFU_CJK_EXT
- ret["tofu-cjk-lang"] = have_TOFU_CJK_LANG
- ret["tofu-emoji"] = have_TOFU_EMOJI
- ret["tofu-historic"] = have_TOFU_HISTORIC
- ret["tofu-sil"] = have_TOFU_SIL
- ret["tofu-symbol"] = have_TOFU_SYMBOL
- ret["xps"] = bool(mupdf.FZ_ENABLE_XPS)
- return ret
- def JM_insert_contents(pdf, pageref, newcont, overlay):
- '''
- Insert a buffer as a new separate /Contents object of a page.
- 1. Create a new stream object from buffer 'newcont'
- 2. If /Contents already is an array, then just prepend or append this object
- 3. Else, create new array and put old content obj and this object into it.
- If the page had no /Contents before, just create a 1-item array.
- '''
- contents = mupdf.pdf_dict_get(pageref, PDF_NAME('Contents'))
- newconts = mupdf.pdf_add_stream(pdf, newcont, mupdf.PdfObj(), 0)
- xref = mupdf.pdf_to_num(newconts)
- if mupdf.pdf_is_array(contents):
- if overlay: # append new object
- mupdf.pdf_array_push(contents, newconts)
- else: # prepend new object
- mupdf.pdf_array_insert(contents, newconts, 0)
- else:
- carr = mupdf.pdf_new_array(pdf, 5)
- if overlay:
- if contents.m_internal:
- mupdf.pdf_array_push(carr, contents)
- mupdf.pdf_array_push(carr, newconts)
- else:
- mupdf.pdf_array_push(carr, newconts)
- if contents.m_internal:
- mupdf.pdf_array_push(carr, contents)
- mupdf.pdf_dict_put(pageref, PDF_NAME('Contents'), carr)
- return xref
- def JM_insert_font(pdf, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
- '''
- Insert a font in a PDF
- '''
- font = None
- res = None
- data = None
- ixref = 0
- index = 0
- simple = 0
- value=None
- name=None
- subt=None
- exto = None
- ENSURE_OPERATION(pdf)
- # check for CJK font
- if ordering > -1:
- data, size, index = mupdf.fz_lookup_cjk_font(ordering)
- if data:
- font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
- font_obj = mupdf.pdf_add_cjk_font(pdf, font, ordering, wmode, serif)
- exto = "n/a"
- simple = 0
- #goto weiter;
- else:
- # check for PDF Base-14 font
- if bfname:
- data, size = mupdf.fz_lookup_base14_font(bfname)
- if data:
- font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
- font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
- exto = "n/a"
- simple = 1
- #goto weiter;
- else:
- if fontfile:
- font = mupdf.fz_new_font_from_file(None, fontfile, idx, 0)
- else:
- res = JM_BufferFromBytes(fontbuffer)
- if not res.m_internal:
- RAISEPY(MSG_FILE_OR_BUFFER, PyExc_ValueError)
- font = mupdf.fz_new_font_from_buffer(None, res, idx, 0)
- if not set_simple:
- font_obj = mupdf.pdf_add_cid_font(pdf, font)
- simple = 0
- else:
- font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
- simple = 2
- #weiter: ;
- ixref = mupdf.pdf_to_num(font_obj)
- name = JM_EscapeStrFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get(font_obj, PDF_NAME('BaseFont'))))
- subt = JM_UnicodeFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get( font_obj, PDF_NAME('Subtype'))))
- if not exto:
- exto = JM_UnicodeFromStr(JM_get_fontextension(pdf, ixref))
- asc = mupdf.fz_font_ascender(font)
- dsc = mupdf.fz_font_descender(font)
- value = [
- ixref,
- {
- "name": name, # base font name
- "type": subt, # subtype
- "ext": exto, # file extension
- "simple": bool(simple), # simple font?
- "ordering": ordering, # CJK font?
- "ascender": asc,
- "descender": dsc,
- },
- ]
- return value
- def JM_irect_from_py(r):
- '''
- PySequence to mupdf.FzIrect. Default: infinite irect
- '''
- if isinstance(r, mupdf.FzIrect):
- return r
- if isinstance(r, IRect):
- r = mupdf.FzIrect( r.x0, r.y0, r.x1, r.y1)
- return r
- if isinstance(r, Rect):
- ret = mupdf.FzRect(r.x0, r.y0, r.x1, r.y1)
- ret = mupdf.FzIrect(ret) # Uses fz_irect_from_rect().
- return ret
- if isinstance(r, mupdf.FzRect):
- ret = mupdf.FzIrect(r) # Uses fz_irect_from_rect().
- return ret
- if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
- return mupdf.FzIrect(mupdf.fz_infinite_irect)
- f = [0, 0, 0, 0]
- for i in range(4):
- f[i] = r[i]
- if f[i] is None:
- return mupdf.FzIrect(mupdf.fz_infinite_irect)
- if f[i] < FZ_MIN_INF_RECT:
- f[i] = FZ_MIN_INF_RECT
- if f[i] > FZ_MAX_INF_RECT:
- f[i] = FZ_MAX_INF_RECT
- return mupdf.fz_make_irect(f[0], f[1], f[2], f[3])
- def JM_listbox_value( annot):
- '''
- ListBox retrieve value
- '''
- # may be single value or array
- annot_obj = mupdf.pdf_annot_obj( annot)
- optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('V'))
- if mupdf.pdf_is_string( optarr): # a single string
- return mupdf.pdf_to_text_string( optarr)
- # value is an array (may have len 0)
- n = mupdf.pdf_array_len( optarr)
- liste = []
- # extract a list of strings
- # each entry may again be an array: take second entry then
- for i in range( n):
- elem = mupdf.pdf_array_get( optarr, i)
- if mupdf.pdf_is_array( elem):
- elem = mupdf.pdf_array_get( elem, 1)
- liste.append( JM_UnicodeFromStr( mupdf.pdf_to_text_string( elem)))
- return liste
- def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
- # PyMuPDF uses a fz_buffer to build up the string, but it's non-trivial to
- # convert the fz_buffer's `unsigned char*` into a `const char*` suitable
- # for passing to pdf_dict_put_text_string(). So instead we build up the
- # string directly in Python.
- buf = ''
- if ncol < 1:
- buf += f'0 g '
- elif ncol == 1:
- buf += f'{col[0]:g} g '
- elif ncol == 2:
- assert 0
- elif ncol == 3:
- buf += f'{col[0]:g} {col[1]:g} {col[2]:g} rg '
- else:
- buf += f'{col[0]:g} {col[1]:g} {col[2]:g} {col[3]:g} k '
- buf += f'/{JM_expand_fname(fontname)} {fontsize} Tf'
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf)
- def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
- if g_use_extra:
- return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
- char_list = None
- span_list = []
- mupdf.fz_clear_buffer(buff)
- span_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- line_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- class char_style:
- def __init__(self, rhs=None):
- if rhs:
- self.size = rhs.size
- self.flags = rhs.flags
- if mupdf_version_tuple >= (1, 25, 2):
- self.char_flags = rhs.char_flags
- self.font = rhs.font
- self.argb = rhs.argb
- self.asc = rhs.asc
- self.desc = rhs.desc
- self.bidi = rhs.bidi
- else:
- self.size = -1
- self.flags = -1
- if mupdf_version_tuple >= (1, 25, 2):
- self.char_flags = -1
- self.font = ''
- self.argb = -1
- self.asc = 0
- self.desc = 0
- self.bidi = 0
- def __str__(self):
- ret = f'{self.size} {self.flags}'
- if mupdf_version_tuple >= (1, 25, 2):
- ret += f' {self.char_flags}'
- ret += f' {self.font} {self.color} {self.asc} {self.desc}'
- return ret
- old_style = char_style()
- style = char_style()
- span = None
- span_origin = None
- for ch in line:
- # start-trace
- r = JM_char_bbox(line, ch)
- if (not JM_rects_overlap(tp_rect, r)
- and not mupdf.fz_is_infinite_rect(tp_rect)
- ):
- continue
- # Info from:
- # detect_super_script()
- # fz_font_is_italic()
- # fz_font_is_serif()
- # fz_font_is_monospaced()
- # fz_font_is_bold()
-
- flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
- origin = mupdf.FzPoint(ch.m_internal.origin)
- style.size = ch.m_internal.size
- style.flags = flags
- if mupdf_version_tuple >= (1, 25, 2):
- # FZ_STEXT_SYNTHETIC is per-char, not per-span.
- style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC
- style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
- style.argb = ch.m_internal.argb
- style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
- style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
- style.bidi = ch.m_internal.bidi
- if (style.size != old_style.size
- or style.flags != old_style.flags
- or (mupdf_version_tuple >= (1, 25, 2)
- and (style.char_flags != old_style.char_flags)
- )
- or style.argb != old_style.argb
- or style.font != old_style.font
- or style.bidi != old_style.bidi
- ):
- if old_style.size >= 0:
- # not first one, output previous
- if raw:
- # put character list in the span
- span[dictkey_chars] = char_list
- char_list = None
- else:
- # put text string in the span
- span[dictkey_text] = JM_EscapeStrFromBuffer( buff)
- mupdf.fz_clear_buffer(buff)
- span[dictkey_origin] = JM_py_from_point(span_origin)
- span[dictkey_bbox] = JM_py_from_rect(span_rect)
- line_rect = mupdf.fz_union_rect(line_rect, span_rect)
- span_list.append( span)
- span = None
- span = dict()
- asc = style.asc
- desc = style.desc
- if style.asc < 1e-3:
- asc = 0.9
- desc = -0.1
- span[dictkey_size] = style.size
- span[dictkey_flags] = style.flags
- span[dictkey_bidi] = style.bidi
- if mupdf_version_tuple >= (1, 25, 2):
- span[dictkey_char_flags] = style.char_flags
- span[dictkey_font] = JM_EscapeStrFromStr(style.font)
- span[dictkey_color] = style.argb & 0xffffff
- if mupdf_version_tuple >= (1, 25, 0):
- span['alpha'] = style.argb >> 24
- span["ascender"] = asc
- span["descender"] = desc
- # Need to be careful here - doing 'old_style=style' does a shallow
- # copy, but we need to keep old_style as a distinct instance.
- old_style = char_style(style)
- span_rect = r
- span_origin = origin
- span_rect = mupdf.fz_union_rect(span_rect, r)
- if raw: # make and append a char dict
- char_dict = dict()
- char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
- char_dict[dictkey_bbox] = JM_py_from_rect(r)
- char_dict[dictkey_c] = chr(ch.m_internal.c)
- char_dict['synthetic'] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC)
- if char_list is None:
- char_list = []
- char_list.append(char_dict)
- else: # add character byte to buffer
- JM_append_rune(buff, ch.m_internal.c)
- # all characters processed, now flush remaining span
- if span:
- if raw:
- span[dictkey_chars] = char_list
- char_list = None
- else:
- span[dictkey_text] = JM_EscapeStrFromBuffer(buff)
- mupdf.fz_clear_buffer(buff)
- span[dictkey_origin] = JM_py_from_point(span_origin)
- span[dictkey_bbox] = JM_py_from_rect(span_rect)
- if not mupdf.fz_is_empty_rect(span_rect):
- span_list.append(span)
- line_rect = mupdf.fz_union_rect(line_rect, span_rect)
- span = None
- if not mupdf.fz_is_empty_rect(line_rect):
- line_dict[dictkey_spans] = span_list
- else:
- line_dict[dictkey_spans] = span_list
- return line_rect
- def _make_image_dict(img, img_dict):
- """Populate a dictionary with information extracted from a given image.
- Used by 'Document.extract_image' and by 'JM_make_image_block'.
- Both of these functions will add some more specific information.
- """
- img_type = img.fz_compressed_image_type()
- ext = JM_image_extension(img_type)
- # compressed image buffer if present, else None
- ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
- if (0
- or not ll_cbuf
- or img_type in (mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN)
- or img_type < mupdf.FZ_IMAGE_BMP
- ):
- # not an image with a compressed buffer: convert to PNG
- res = mupdf.fz_new_buffer_from_image_as_png(
- img,
- mupdf.FzColorParams(mupdf.fz_default_color_params),
- )
- ext = "png"
- elif ext == "jpeg" and img.n() == 4:
- # JPEG with CMYK: invert colors
- res = mupdf.fz_new_buffer_from_image_as_jpeg(
- img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1)
- else:
- # copy the compressed buffer
- res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
- bytes_ = JM_BinFromBuffer(res)
- img_dict[dictkey_width] = img.w()
- img_dict[dictkey_height] = img.h()
- img_dict[dictkey_ext] = ext
- img_dict[dictkey_colorspace] = img.n()
- img_dict[dictkey_xres] = img.xres()
- img_dict[dictkey_yres] = img.yres()
- img_dict[dictkey_bpc] = img.bpc()
- img_dict[dictkey_size] = len(bytes_)
- img_dict[dictkey_image] = bytes_
- def JM_make_image_block(block, block_dict):
- img = block.i_image()
- _make_image_dict(img, block_dict)
- # if the image has a mask, store it as a PNG buffer
- mask = img.mask()
- if mask.m_internal:
- buff = mask.fz_new_buffer_from_image_as_png(mupdf.FzColorParams(mupdf.fz_default_color_params))
- block_dict["mask"] = buff.fz_buffer_extract()
- else:
- block_dict["mask"] = None
- block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
- def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
- if g_use_extra:
- return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
- line_list = []
- block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
- #log(f'{block=}')
- for line in block:
- #log(f'{line=}')
- if (mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(line.m_internal.bbox)))
- and not mupdf.fz_is_infinite_rect(tp_rect)
- ):
- continue
- line_dict = dict()
- line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
- block_rect = mupdf.fz_union_rect(block_rect, line_rect)
- line_dict[dictkey_wmode] = line.m_internal.wmode
- line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir)
- line_dict[dictkey_bbox] = JM_py_from_rect(line_rect)
- line_list.append(line_dict)
- block_dict[dictkey_bbox] = JM_py_from_rect(block_rect)
- block_dict[dictkey_lines] = line_list
- def JM_make_textpage_dict(tp, page_dict, raw):
- if g_use_extra:
- return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
- text_buffer = mupdf.fz_new_buffer(128)
- block_list = []
- tp_rect = mupdf.FzRect(tp.m_internal.mediabox)
- block_n = -1
- #log( 'JM_make_textpage_dict {=tp}')
- for block in tp:
- block_n += 1
- if (not mupdf.fz_contains_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox))
- and not mupdf.fz_is_infinite_rect(tp_rect)
- and block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE
- ):
- continue
- if (not mupdf.fz_is_infinite_rect(tp_rect)
- and mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox)))
- ):
- continue
- block_dict = dict()
- block_dict[dictkey_number] = block_n
- block_dict[dictkey_type] = block.m_internal.type
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE:
- block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
- JM_make_image_block(block, block_dict)
- else:
- JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect)
- block_list.append(block_dict)
- page_dict[dictkey_blocks] = block_list
- def JM_matrix_from_py(m):
- a = [0, 0, 0, 0, 0, 0]
- if isinstance(m, mupdf.FzMatrix):
- return m
- if isinstance(m, Matrix):
- return mupdf.FzMatrix(m.a, m.b, m.c, m.d, m.e, m.f)
- if not m or not PySequence_Check(m) or PySequence_Size(m) != 6:
- return mupdf.FzMatrix()
- for i in range(6):
- a[i] = JM_FLOAT_ITEM(m, i)
- if a[i] is None:
- return mupdf.FzRect()
- return mupdf.FzMatrix(a[0], a[1], a[2], a[3], a[4], a[5])
- def JM_mediabox(page_obj):
- '''
- return a PDF page's MediaBox
- '''
- page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- mediabox = mupdf.pdf_to_rect(
- mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('MediaBox'))
- )
- if mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox):
- mediabox.x0 = 0
- mediabox.y0 = 0
- mediabox.x1 = 612
- mediabox.y1 = 792
- page_mediabox = mupdf.FzRect(
- mupdf.fz_min(mediabox.x0, mediabox.x1),
- mupdf.fz_min(mediabox.y0, mediabox.y1),
- mupdf.fz_max(mediabox.x0, mediabox.x1),
- mupdf.fz_max(mediabox.y0, mediabox.y1),
- )
- if (page_mediabox.x1 - page_mediabox.x0 < 1
- or page_mediabox.y1 - page_mediabox.y0 < 1
- ):
- page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- return page_mediabox
- def JM_merge_range(
- doc_des,
- doc_src,
- spage,
- epage,
- apage,
- rotate,
- links,
- annots,
- show_progress,
- graft_map,
- ):
- '''
- Copy a range of pages (spage, epage) from a source PDF to a specified
- location (apage) of the target PDF.
- If spage > epage, the sequence of source pages is reversed.
- '''
- if g_use_extra:
- return extra.JM_merge_range(
- doc_des,
- doc_src,
- spage,
- epage,
- apage,
- rotate,
- links,
- annots,
- show_progress,
- graft_map,
- )
- afterpage = apage
- counter = 0 # copied pages counter
- total = mupdf.fz_absi(epage - spage) + 1 # total pages to copy
- if spage < epage:
- page = spage
- while page <= epage:
- page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
- counter += 1
- if show_progress > 0 and counter % show_progress == 0:
- message(f"Inserted {counter} of {total} pages.")
- page += 1
- afterpage += 1
- else:
- page = spage
- while page >= epage:
- page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
- counter += 1
- if show_progress > 0 and counter % show_progress == 0:
- message(f"Inserted {counter} of {total} pages.")
- page -= 1
- afterpage += 1
- def JM_merge_resources( page, temp_res):
- '''
- Merge the /Resources object created by a text pdf device into the page.
- The device may have created multiple /ExtGState/Alp? and /Font/F? objects.
- These need to be renamed (renumbered) to not overwrite existing page
- objects from previous executions.
- Returns the next available numbers n, m for objects /Alp<n>, /F<m>.
- '''
- # page objects /Resources, /Resources/ExtGState, /Resources/Font
- resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 5)
- main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
- main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
- # text pdf device objects /ExtGState, /Font
- temp_extg = mupdf.pdf_dict_get(temp_res, PDF_NAME('ExtGState'))
- temp_fonts = mupdf.pdf_dict_get(temp_res, PDF_NAME('Font'))
- max_alp = -1
- max_fonts = -1
- # Handle /Alp objects
- if mupdf.pdf_is_dict(temp_extg): # any created at all?
- n = mupdf.pdf_dict_len(temp_extg)
- if mupdf.pdf_is_dict(main_extg): # does page have /ExtGState yet?
- for i in range(mupdf.pdf_dict_len(main_extg)):
- # get highest number of objects named /Alpxxx
- alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key(main_extg, i))
- if not alp.startswith('Alp'):
- continue
- j = mupdf.fz_atoi(alp[3:])
- if j > max_alp:
- max_alp = j
- else: # create a /ExtGState for the page
- main_extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), n)
- max_alp += 1
- for i in range(n): # copy over renumbered /Alp objects
- alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_extg, i))
- j = mupdf.fz_atoi(alp[3:]) + max_alp
- text = f'Alp{j}'
- val = mupdf.pdf_dict_get_val( temp_extg, i)
- mupdf.pdf_dict_puts(main_extg, text, val)
- if mupdf.pdf_is_dict(main_fonts): # has page any fonts yet?
- for i in range(mupdf.pdf_dict_len(main_fonts)): # get max font number
- font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( main_fonts, i))
- if not font.startswith("F"):
- continue
- j = mupdf.fz_atoi(font[1:])
- if j > max_fonts:
- max_fonts = j
- else: # create a Resources/Font for the page
- main_fonts = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Font'), 2)
- max_fonts += 1
- for i in range(mupdf.pdf_dict_len(temp_fonts)): # copy renumbered fonts
- font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_fonts, i))
- j = mupdf.fz_atoi(font[1:]) + max_fonts
- text = f'F{j}'
- val = mupdf.pdf_dict_get_val(temp_fonts, i)
- mupdf.pdf_dict_puts(main_fonts, text, val)
- return (max_alp, max_fonts) # next available numbers
- def JM_mupdf_warning( text):
- '''
- redirect MuPDF warnings
- '''
- JM_mupdf_warnings_store.append(text)
- if JM_mupdf_show_warnings:
- message(f'MuPDF warning: {text}')
- def JM_mupdf_error( text):
- JM_mupdf_warnings_store.append(text)
- if JM_mupdf_show_errors:
- message(f'MuPDF error: {text}\n')
- def JM_new_bbox_device(rc, inc_layers):
- assert isinstance(rc, list)
- return JM_new_bbox_device_Device( rc, inc_layers)
- def JM_new_buffer_from_stext_page(page):
- '''
- make a buffer from an stext_page's text
- '''
- assert isinstance(page, mupdf.FzStextPage)
- rect = mupdf.FzRect(page.m_internal.mediabox)
- buf = mupdf.fz_new_buffer(256)
- for block in page:
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- for line in block:
- for ch in line:
- if (not JM_rects_overlap(rect, JM_char_bbox(line, ch))
- and not mupdf.fz_is_infinite_rect(rect)
- ):
- continue
- mupdf.fz_append_rune(buf, ch.m_internal.c)
- mupdf.fz_append_byte(buf, ord('\n'))
- mupdf.fz_append_byte(buf, ord('\n'))
- return buf
- def JM_new_javascript(pdf, value):
- '''
- make new PDF action object from JavaScript source
- Parameters are a PDF document and a Python string.
- Returns a PDF action object.
- '''
- if value is None:
- # no argument given
- return
- data = JM_StrAsChar(value)
- if data is None:
- # not convertible to char*
- return
- res = mupdf.fz_new_buffer_from_copied_data(data.encode('utf8'))
- source = mupdf.pdf_add_stream(pdf, res, mupdf.PdfObj(), 0)
- newaction = mupdf.pdf_add_new_dict(pdf, 4)
- mupdf.pdf_dict_put(newaction, PDF_NAME('S'), mupdf.pdf_new_name('JavaScript'))
- mupdf.pdf_dict_put(newaction, PDF_NAME('JS'), source)
- return newaction
- def JM_new_output_fileptr(bio):
- return JM_new_output_fileptr_Output( bio)
- def JM_norm_rotation(rotate):
- '''
- # return normalized /Rotate value:one of 0, 90, 180, 270
- '''
- while rotate < 0:
- rotate += 360
- while rotate >= 360:
- rotate -= 360
- if rotate % 90 != 0:
- return 0
- return rotate
- def JM_object_to_buffer(what, compress, ascii):
- res = mupdf.fz_new_buffer(512)
- out = mupdf.FzOutput(res)
- mupdf.pdf_print_obj(out, what, compress, ascii)
- out.fz_close_output()
- mupdf.fz_terminate_buffer(res)
- return res
- def JM_outline_xrefs(obj, xrefs):
- '''
- Return list of outline xref numbers. Recursive function. Arguments:
- 'obj' first OL item
- 'xrefs' empty Python list
- '''
- if not obj.m_internal:
- return xrefs
- thisobj = obj
- while thisobj.m_internal:
- newxref = mupdf.pdf_to_num( thisobj)
- if newxref in xrefs or mupdf.pdf_dict_get( thisobj, PDF_NAME('Type')).m_internal:
- # circular ref or top of chain: terminate
- break
- xrefs.append( newxref)
- first = mupdf.pdf_dict_get( thisobj, PDF_NAME('First')) # try go down
- if mupdf.pdf_is_dict( first):
- xrefs = JM_outline_xrefs( first, xrefs)
- thisobj = mupdf.pdf_dict_get( thisobj, PDF_NAME('Next')) # try go next
- parent = mupdf.pdf_dict_get( thisobj, PDF_NAME('Parent')) # get parent
- if not mupdf.pdf_is_dict( thisobj):
- thisobj = parent
- return xrefs
- def JM_page_rotation(page):
- '''
- return a PDF page's /Rotate value: one of (0, 90, 180, 270)
- '''
- rotate = 0
- obj = mupdf.pdf_dict_get_inheritable( page.obj(), mupdf.PDF_ENUM_NAME_Rotate)
- rotate = mupdf.pdf_to_int(obj)
- rotate = JM_norm_rotation(rotate)
- return rotate
- def JM_pdf_obj_from_str(doc, src):
- '''
- create PDF object from given string (new in v1.14.0: MuPDF dropped it)
- '''
- # fixme: seems inefficient to convert to bytes instance then make another
- # copy inside fz_new_buffer_from_copied_data(), but no other way?
- #
- buffer_ = mupdf.fz_new_buffer_from_copied_data(bytes(src, 'utf8'))
- stream = mupdf.fz_open_buffer(buffer_)
- lexbuf = mupdf.PdfLexbuf(mupdf.PDF_LEXBUF_SMALL)
- result = mupdf.pdf_parse_stm_obj(doc, stream, lexbuf)
- return result
- def JM_pixmap_from_display_list(
- list_,
- ctm,
- cs,
- alpha,
- clip,
- seps,
- ):
- '''
- Version of fz_new_pixmap_from_display_list (util.c) to also support
- rendering of only the 'clip' part of the displaylist rectangle
- '''
- assert isinstance(list_, mupdf.FzDisplayList)
- if seps is None:
- seps = mupdf.FzSeparations()
- assert seps is None or isinstance(seps, mupdf.FzSeparations), f'{type(seps)=}: {seps}'
- rect = mupdf.fz_bound_display_list(list_)
- matrix = JM_matrix_from_py(ctm)
- rclip = JM_rect_from_py(clip)
- rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
- rect = mupdf.fz_transform_rect(rect, matrix)
- irect = mupdf.fz_round_rect(rect)
- assert isinstance( cs, mupdf.FzColorspace)
- pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha)
- if alpha:
- mupdf.fz_clear_pixmap(pix)
- else:
- mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
- if not mupdf.fz_is_infinite_rect(rclip):
- dev = mupdf.fz_new_draw_device_with_bbox(matrix, pix, irect)
- mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), rclip, mupdf.FzCookie())
- else:
- dev = mupdf.fz_new_draw_device(matrix, pix)
- mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), mupdf.FzCookie())
- mupdf.fz_close_device(dev)
- # Use special raw Pixmap constructor so we don't set alpha to true.
- return Pixmap( 'raw', pix)
- def JM_point_from_py(p):
- '''
- PySequence to fz_point. Default: (FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
- '''
- if isinstance(p, mupdf.FzPoint):
- return p
- if isinstance(p, Point):
- return mupdf.FzPoint(p.x, p.y)
- if g_use_extra:
- return extra.JM_point_from_py( p)
-
- p0 = mupdf.FzPoint(0, 0)
- x = JM_FLOAT_ITEM(p, 0)
- y = JM_FLOAT_ITEM(p, 1)
- if x is None or y is None:
- return p0
- x = max( x, FZ_MIN_INF_RECT)
- y = max( y, FZ_MIN_INF_RECT)
- x = min( x, FZ_MAX_INF_RECT)
- y = min( y, FZ_MAX_INF_RECT)
- return mupdf.FzPoint(x, y)
- def JM_print_stext_page_as_text(res, page):
- '''
- Plain text output. An identical copy of fz_print_stext_page_as_text,
- but lines within a block are concatenated by space instead a new-line
- character (which else leads to 2 new-lines).
- '''
- if 1 and g_use_extra:
- return extra.JM_print_stext_page_as_text(res, page)
-
- assert isinstance(res, mupdf.FzBuffer)
- assert isinstance(page, mupdf.FzStextPage)
- rect = mupdf.FzRect(page.m_internal.mediabox)
- last_char = 0
- n_blocks = 0
- n_lines = 0
- n_chars = 0
- for n_blocks2, block in enumerate( page):
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- for n_lines2, line in enumerate( block):
- for n_chars2, ch in enumerate( line):
- pass
- n_chars += n_chars2
- n_lines += n_lines2
- n_blocks += n_blocks2
-
- for block in page:
- if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
- for line in block:
- last_char = 0
- for ch in line:
- chbbox = JM_char_bbox(line, ch)
- if (mupdf.fz_is_infinite_rect(rect)
- or JM_rects_overlap(rect, chbbox)
- ):
- #raw += chr(ch.m_internal.c)
- last_char = ch.m_internal.c
- #log( '{=last_char!r utf!r}')
- JM_append_rune(res, last_char)
- if last_char != 10 and last_char > 0:
- mupdf.fz_append_string(res, "\n")
- def JM_put_script(annot_obj, key1, key2, value):
- '''
- Create a JavaScript PDF action.
- Usable for all object types which support PDF actions, even if the
- argument name suggests annotations. Up to 2 key values can be specified, so
- JavaScript actions can be stored for '/A' and '/AA/?' keys.
- '''
- key1_obj = mupdf.pdf_dict_get(annot_obj, key1)
- pdf = mupdf.pdf_get_bound_document(annot_obj) # owning PDF
- # if no new script given, just delete corresponding key
- if not value:
- if key2 is None or not key2.m_internal:
- mupdf.pdf_dict_del(annot_obj, key1)
- elif key1_obj.m_internal:
- mupdf.pdf_dict_del(key1_obj, key2)
- return
- # read any existing script as a PyUnicode string
- if not key2.m_internal or not key1_obj.m_internal:
- script = JM_get_script(key1_obj)
- else:
- script = JM_get_script(mupdf.pdf_dict_get(key1_obj, key2))
- # replace old script, if different from new one
- if value != script:
- newaction = JM_new_javascript(pdf, value)
- if not key2.m_internal:
- mupdf.pdf_dict_put(annot_obj, key1, newaction)
- else:
- mupdf.pdf_dict_putl(annot_obj, newaction, key1, key2)
- def JM_py_from_irect(r):
- return r.x0, r.y0, r.x1, r.y1
- def JM_py_from_matrix(m):
- return m.a, m.b, m.c, m.d, m.e, m.f
- def JM_py_from_point(p):
- return p.x, p.y
- def JM_py_from_quad(q):
- '''
- PySequence from fz_quad.
- '''
- return (
- (q.ul.x, q.ul.y),
- (q.ur.x, q.ur.y),
- (q.ll.x, q.ll.y),
- (q.lr.x, q.lr.y),
- )
- def JM_py_from_rect(r):
- return r.x0, r.y0, r.x1, r.y1
- def JM_quad_from_py(r):
- if isinstance(r, mupdf.FzQuad):
- return r
- # cover all cases of 4-float-sequences
- if hasattr(r, "__getitem__") and len(r) == 4 and hasattr(r[0], "__float__"):
- r = mupdf.FzRect(*tuple(r))
- if isinstance( r, mupdf.FzRect):
- return mupdf.fz_quad_from_rect( r)
- if isinstance( r, Quad):
- return mupdf.fz_make_quad(
- r.ul.x, r.ul.y,
- r.ur.x, r.ur.y,
- r.ll.x, r.ll.y,
- r.lr.x, r.lr.y,
- )
- q = mupdf.fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0)
- p = [0,0,0,0]
- if not r or not isinstance(r, (tuple, list)) or len(r) != 4:
- return q
- if JM_FLOAT_ITEM(r, 0) is None:
- return mupdf.fz_quad_from_rect(JM_rect_from_py(r))
- for i in range(4):
- if i >= len(r):
- return q # invalid: cancel the rest
- obj = r[i] # next point item
- if not PySequence_Check(obj) or PySequence_Size(obj) != 2:
- return q # invalid: cancel the rest
- p[i].x = JM_FLOAT_ITEM(obj, 0)
- p[i].y = JM_FLOAT_ITEM(obj, 1)
- if p[i].x is None or p[i].y is None:
- return q
- p[i].x = max( p[i].x, FZ_MIN_INF_RECT)
- p[i].y = max( p[i].y, FZ_MIN_INF_RECT)
- p[i].x = min( p[i].x, FZ_MAX_INF_RECT)
- p[i].y = min( p[i].y, FZ_MAX_INF_RECT)
- q.ul = p[0]
- q.ur = p[1]
- q.ll = p[2]
- q.lr = p[3]
- return q
- def JM_read_contents(pageref):
- '''
- Read and concatenate a PDF page's /Contents object(s) in a buffer
- '''
- assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}'
- contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents)
- if mupdf.pdf_is_array(contents):
- res = mupdf.FzBuffer(1024)
- for i in range(mupdf.pdf_array_len(contents)):
- if i > 0:
- mupdf.fz_append_byte(res, 32)
- obj = mupdf.pdf_array_get(contents, i)
- if mupdf.pdf_is_stream(obj):
- nres = mupdf.pdf_load_stream(obj)
- mupdf.fz_append_buffer(res, nres)
- elif contents.m_internal:
- res = mupdf.pdf_load_stream(contents)
- else:
- res = mupdf.FzBuffer(0)
- return res
- def JM_rect_from_py(r):
- if isinstance(r, mupdf.FzRect):
- return r
- if isinstance(r, mupdf.FzIrect):
- return mupdf.FzRect(r)
- if isinstance(r, Rect):
- return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
- if isinstance(r, IRect):
- return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
- if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
- return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
- f = [0, 0, 0, 0]
- for i in range(4):
- f[i] = JM_FLOAT_ITEM(r, i)
- if f[i] is None:
- return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
- if f[i] < FZ_MIN_INF_RECT:
- f[i] = FZ_MIN_INF_RECT
- if f[i] > FZ_MAX_INF_RECT:
- f[i] = FZ_MAX_INF_RECT
- return mupdf.fz_make_rect(f[0], f[1], f[2], f[3])
- def JM_rects_overlap(a, b):
- if (0
- or a.x0 >= b.x1
- or a.y0 >= b.y1
- or a.x1 <= b.x0
- or a.y1 <= b.y0
- ):
- return 0
- return 1
- def JM_refresh_links( page):
- '''
- refreshes the link and annotation tables of a page
- '''
- if page is None or not page.m_internal:
- return
- obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
- if obj.m_internal:
- pdf = page.doc()
- number = mupdf.pdf_lookup_page_number( pdf, page.obj())
- page_mediabox = mupdf.FzRect()
- page_ctm = mupdf.FzMatrix()
- mupdf.pdf_page_transform( page, page_mediabox, page_ctm)
- link = mupdf.pdf_load_link_annots( pdf, page, obj, number, page_ctm)
- page.m_internal.links = mupdf.ll_fz_keep_link( link.m_internal)
- def JM_rotate_page_matrix(page):
- '''
- calculate page rotation matrices
- '''
- if not page.m_internal:
- return mupdf.FzMatrix() # no valid pdf page given
- rotation = JM_page_rotation(page)
- #log( '{rotation=}')
- if rotation == 0:
- return mupdf.FzMatrix() # no rotation
- cb_size = JM_cropbox_size(page.obj())
- w = cb_size.x
- h = cb_size.y
- #log( '{=h w}')
- if rotation == 90:
- m = mupdf.fz_make_matrix(0, 1, -1, 0, h, 0)
- elif rotation == 180:
- m = mupdf.fz_make_matrix(-1, 0, 0, -1, w, h)
- else:
- m = mupdf.fz_make_matrix(0, -1, 1, 0, 0, w)
- #log( 'returning {m=}')
- return m
- def JM_search_stext_page(page, needle):
- if g_use_extra:
- return extra.JM_search_stext_page(page.m_internal, needle)
-
- rect = mupdf.FzRect(page.m_internal.mediabox)
- if not needle:
- return
- quads = []
- class Hits:
- def __str__(self):
- return f'Hits(len={self.len} quads={self.quads} hfuzz={self.hfuzz} vfuzz={self.vfuzz}'
- hits = Hits()
- hits.len = 0
- hits.quads = quads
- hits.hfuzz = 0.2 # merge kerns but not large gaps
- hits.vfuzz = 0.1
- buffer_ = JM_new_buffer_from_stext_page(page)
- haystack_string = mupdf.fz_string_from_buffer(buffer_)
- haystack = 0
- begin, end = find_string(haystack_string[haystack:], needle)
- if begin is None:
- #goto no_more_matches;
- return quads
- begin += haystack
- end += haystack
- inside = 0
- i = 0
- for block in page:
- if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
- continue
- for line in block:
- for ch in line:
- i += 1
- if not mupdf.fz_is_infinite_rect(rect):
- r = JM_char_bbox(line, ch)
- if not JM_rects_overlap(rect, r):
- #goto next_char;
- continue
- while 1:
- #try_new_match:
- if not inside:
- if haystack >= begin:
- inside = 1
- if inside:
- if haystack < end:
- on_highlight_char(hits, line, ch)
- break
- else:
- inside = 0
- begin, end = find_string(haystack_string[haystack:], needle)
- if begin is None:
- #goto no_more_matches;
- return quads
- else:
- #goto try_new_match;
- begin += haystack
- end += haystack
- continue
- break
- haystack += 1
- #next_char:;
- assert haystack_string[haystack] == '\n', \
- f'{haystack=} {haystack_string[haystack]=}'
- haystack += 1
- assert haystack_string[haystack] == '\n', \
- f'{haystack=} {haystack_string[haystack]=}'
- haystack += 1
- #no_more_matches:;
- return quads
- def JM_scan_resources(pdf, rsrc, liste, what, stream_xref, tracer):
- '''
- Step through /Resources, looking up image, xobject or font information
- '''
- if mupdf.pdf_mark_obj(rsrc):
- mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
- return # Circular dependencies!
- try:
- xobj = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_XObject)
- if what == 1: # lookup fonts
- font = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_Font)
- JM_gather_fonts(pdf, font, liste, stream_xref)
- elif what == 2: # look up images
- JM_gather_images(pdf, xobj, liste, stream_xref)
- elif what == 3: # look up form xobjects
- JM_gather_forms(pdf, xobj, liste, stream_xref)
- else: # should never happen
- return
- # check if we need to recurse into Form XObjects
- n = mupdf.pdf_dict_len(xobj)
- for i in range(n):
- obj = mupdf.pdf_dict_get_val(xobj, i)
- if mupdf.pdf_is_stream(obj):
- sxref = mupdf.pdf_to_num(obj)
- else:
- sxref = 0
- subrsrc = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Resources)
- if subrsrc.m_internal:
- sxref_t = sxref
- if sxref_t not in tracer:
- tracer.append(sxref_t)
- JM_scan_resources( pdf, subrsrc, liste, what, sxref, tracer)
- else:
- mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
- return
- finally:
- mupdf.pdf_unmark_obj(rsrc)
- def JM_set_choice_options(annot, liste):
- '''
- set ListBox / ComboBox values
- '''
- if not liste:
- return
- assert isinstance( liste, (tuple, list))
- n = len( liste)
- if n == 0:
- return
- annot_obj = mupdf.pdf_annot_obj( annot)
- pdf = mupdf.pdf_get_bound_document( annot_obj)
- optarr = mupdf.pdf_new_array( pdf, n)
- for i in range(n):
- val = liste[i]
- opt = val
- if isinstance(opt, str):
- mupdf.pdf_array_push_text_string( optarr, opt)
- else:
- assert isinstance( val, (tuple, list)) and len( val) == 2, 'bad choice field list'
- opt1, opt2 = val
- assert opt1 and opt2, 'bad choice field list'
- optarrsub = mupdf.pdf_array_push_array( optarr, 2)
- mupdf.pdf_array_push_text_string( optarrsub, opt1)
- mupdf.pdf_array_push_text_string( optarrsub, opt2)
- mupdf.pdf_dict_put( annot_obj, PDF_NAME('Opt'), optarr)
- def JM_set_field_type(doc, obj, type):
- '''
- Set the field type
- '''
- setbits = 0
- clearbits = 0
- typename = None
- if type == mupdf.PDF_WIDGET_TYPE_BUTTON:
- typename = PDF_NAME('Btn')
- setbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
- elif type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- typename = PDF_NAME('Btn')
- clearbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
- setbits = mupdf.PDF_BTN_FIELD_IS_RADIO
- elif type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
- typename = PDF_NAME('Btn')
- clearbits = (mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | mupdf.PDF_BTN_FIELD_IS_RADIO)
- elif type == mupdf.PDF_WIDGET_TYPE_TEXT:
- typename = PDF_NAME('Tx')
- elif type == mupdf.PDF_WIDGET_TYPE_LISTBOX:
- typename = PDF_NAME('Ch')
- clearbits = mupdf.PDF_CH_FIELD_IS_COMBO
- elif type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
- typename = PDF_NAME('Ch')
- setbits = mupdf.PDF_CH_FIELD_IS_COMBO
- elif type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
- typename = PDF_NAME('Sig')
- if typename is not None and typename.m_internal:
- mupdf.pdf_dict_put(obj, PDF_NAME('FT'), typename)
- if setbits != 0 or clearbits != 0:
- bits = mupdf.pdf_dict_get_int(obj, PDF_NAME('Ff'))
- bits &= ~clearbits
- bits |= setbits
- mupdf.pdf_dict_put_int(obj, PDF_NAME('Ff'), bits)
- def JM_set_object_value(obj, key, value):
- '''
- Set a PDF dict key to some value
- '''
- eyecatcher = "fitz: replace me!"
- pdf = mupdf.pdf_get_bound_document(obj)
- # split PDF key at path seps and take last key part
- list_ = key.split('/')
- len_ = len(list_)
- i = len_ - 1
- skey = list_[i]
- del list_[i] # del the last sub-key
- len_ = len(list_) # remaining length
- testkey = mupdf.pdf_dict_getp(obj, key) # check if key already exists
- if not testkey.m_internal:
- #No, it will be created here. But we cannot allow this happening if
- #indirect objects are referenced. So we check all higher level
- #sub-paths for indirect references.
- while len_ > 0:
- t = '/'.join(list_) # next high level
- if mupdf.pdf_is_indirect(mupdf.pdf_dict_getp(obj, JM_StrAsChar(t))):
- raise Exception("path to '%s' has indirects", JM_StrAsChar(skey))
- del list_[len_ - 1] # del last sub-key
- len_ = len(list_) # remaining length
- # Insert our eyecatcher. Will create all sub-paths in the chain, or
- # respectively remove old value of key-path.
- mupdf.pdf_dict_putp(obj, key, mupdf.pdf_new_text_string(eyecatcher))
- testkey = mupdf.pdf_dict_getp(obj, key)
- if not mupdf.pdf_is_string(testkey):
- raise Exception("cannot insert value for '%s'", key)
- temp = mupdf.pdf_to_text_string(testkey)
- if temp != eyecatcher:
- raise Exception("cannot insert value for '%s'", key)
- # read the result as a string
- res = JM_object_to_buffer(obj, 1, 0)
- objstr = JM_EscapeStrFromBuffer(res)
- # replace 'eyecatcher' by desired 'value'
- nullval = "/%s(%s)" % ( skey, eyecatcher)
- newval = "/%s %s" % (skey, value)
- newstr = objstr.replace(nullval, newval, 1)
- # make PDF object from resulting string
- new_obj = JM_pdf_obj_from_str(pdf, newstr)
- return new_obj
- def JM_set_ocg_arrays(conf, basestate, on, off, rbgroups, locked):
- if basestate:
- mupdf.pdf_dict_put_name( conf, PDF_NAME('BaseState'), basestate)
- if on is not None:
- mupdf.pdf_dict_del( conf, PDF_NAME('ON'))
- if on:
- arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('ON'), 1)
- JM_set_ocg_arrays_imp( arr, on)
- if off is not None:
- mupdf.pdf_dict_del( conf, PDF_NAME('OFF'))
- if off:
- arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('OFF'), 1)
- JM_set_ocg_arrays_imp( arr, off)
- if locked is not None:
- mupdf.pdf_dict_del( conf, PDF_NAME('Locked'))
- if locked:
- arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('Locked'), 1)
- JM_set_ocg_arrays_imp( arr, locked)
- if rbgroups is not None:
- mupdf.pdf_dict_del( conf, PDF_NAME('RBGroups'))
- if rbgroups:
- arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('RBGroups'), 1)
- n =len(rbgroups)
- for i in range(n):
- item0 = rbgroups[i]
- obj = mupdf.pdf_array_push_array( arr, 1)
- JM_set_ocg_arrays_imp( obj, item0)
- def JM_set_ocg_arrays_imp(arr, list_):
- '''
- Set OCG arrays from dict of Python lists
- Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list}
- '''
- pdf = mupdf.pdf_get_bound_document(arr)
- for xref in list_:
- obj = mupdf.pdf_new_indirect(pdf, xref, 0)
- mupdf.pdf_array_push(arr, obj)
- def JM_set_resource_property(ref, name, xref):
- '''
- Insert an item into Resources/Properties (used for Marked Content)
- Arguments:
- (1) e.g. page object, Form XObject
- (2) marked content name
- (3) xref of the referenced object (insert as indirect reference)
- '''
- pdf = mupdf.pdf_get_bound_document(ref)
- ind = mupdf.pdf_new_indirect(pdf, xref, 0)
- if not ind.m_internal:
- RAISEPY(MSG_BAD_XREF, PyExc_ValueError)
- resources = mupdf.pdf_dict_get(ref, PDF_NAME('Resources'))
- if not resources.m_internal:
- resources = mupdf.pdf_dict_put_dict(ref, PDF_NAME('Resources'), 1)
- properties = mupdf.pdf_dict_get(resources, PDF_NAME('Properties'))
- if not properties.m_internal:
- properties = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Properties'), 1)
- mupdf.pdf_dict_put(properties, mupdf.pdf_new_name(name), ind)
- def JM_set_widget_properties(annot, Widget):
- '''
- Update the PDF form field with the properties from a Python Widget object.
- Called by "Page.add_widget" and "Annot.update_widget".
- '''
- if isinstance( annot, Annot):
- annot = annot.this
- assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}'
- page = _pdf_annot_page(annot)
- assert page.m_internal, 'Annot is not bound to a page'
- annot_obj = mupdf.pdf_annot_obj(annot)
- pdf = page.doc()
- def GETATTR(name):
- return getattr(Widget, name, None)
- value = GETATTR("field_type")
- field_type = value
- # rectangle --------------------------------------------------------------
- value = GETATTR("rect")
- rect = JM_rect_from_py(value)
- rot_mat = JM_rotate_page_matrix(page)
- rect = mupdf.fz_transform_rect(rect, rot_mat)
- mupdf.pdf_set_annot_rect(annot, rect)
- # fill color -------------------------------------------------------------
- value = GETATTR("fill_color")
- if value and PySequence_Check(value):
- n = len(value)
- fill_col = mupdf.pdf_new_array(pdf, n)
- col = 0
- for i in range(n):
- col = value[i]
- mupdf.pdf_array_push_real(fill_col, col)
- mupdf.pdf_field_set_fill_color(annot_obj, fill_col)
- # dashes -----------------------------------------------------------------
- value = GETATTR("border_dashes")
- if value and PySequence_Check(value):
- n = len(value)
- dashes = mupdf.pdf_new_array(pdf, n)
- for i in range(n):
- mupdf.pdf_array_push_int(dashes, value[i])
- mupdf.pdf_dict_putl(annot_obj, dashes, PDF_NAME('BS'), PDF_NAME('D'))
- # border color -----------------------------------------------------------
- value = GETATTR("border_color")
- if value and PySequence_Check(value):
- n = len(value)
- border_col = mupdf.pdf_new_array(pdf, n)
- col = 0
- for i in range(n):
- col = value[i]
- mupdf.pdf_array_push_real(border_col, col)
- mupdf.pdf_dict_putl(annot_obj, border_col, PDF_NAME('MK'), PDF_NAME('BC'))
- # entry ignored - may be used later
- #
- #int text_format = (int) PyInt_AsLong(GETATTR("text_format"));
- #
- # field label -----------------------------------------------------------
- value = GETATTR("field_label")
- if value is not None:
- label = JM_StrAsChar(value)
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('TU'), label)
- # field name -------------------------------------------------------------
- value = GETATTR("field_name")
- if value is not None:
- name = JM_StrAsChar(value)
- old_name = mupdf.pdf_load_field_name(annot_obj)
- if name != old_name:
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), name)
- # max text len -----------------------------------------------------------
- if field_type == mupdf.PDF_WIDGET_TYPE_TEXT:
- value = GETATTR("text_maxlen")
- text_maxlen = value
- if text_maxlen:
- mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('MaxLen'), text_maxlen)
- value = GETATTR("field_display")
- d = value
- mupdf.pdf_field_set_display(annot_obj, d)
- # choice values ----------------------------------------------------------
- if field_type in (mupdf.PDF_WIDGET_TYPE_LISTBOX, mupdf.PDF_WIDGET_TYPE_COMBOBOX):
- value = GETATTR("choice_values")
- JM_set_choice_options(annot, value)
- # border style -----------------------------------------------------------
- value = GETATTR("border_style")
- val = JM_get_border_style(value)
- mupdf.pdf_dict_putl(annot_obj, val, PDF_NAME('BS'), PDF_NAME('S'))
- # border width -----------------------------------------------------------
- value = GETATTR("border_width")
- border_width = value
- mupdf.pdf_dict_putl(
- annot_obj,
- mupdf.pdf_new_real(border_width),
- PDF_NAME('BS'),
- PDF_NAME('W'),
- )
- # /DA string -------------------------------------------------------------
- value = GETATTR("_text_da")
- da = JM_StrAsChar(value)
- mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('DA'), da)
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('DS')) # not supported by MuPDF
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('RC')) # not supported by MuPDF
- # field flags ------------------------------------------------------------
- field_flags = GETATTR("field_flags")
- if field_flags is not None:
- if field_type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
- field_flags |= mupdf.PDF_CH_FIELD_IS_COMBO
- elif field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- field_flags |= mupdf.PDF_BTN_FIELD_IS_RADIO
- elif field_type == mupdf.PDF_WIDGET_TYPE_BUTTON:
- field_flags |= mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
- mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Ff'), field_flags)
- # button caption ---------------------------------------------------------
- value = GETATTR("button_caption")
- ca = JM_StrAsChar(value)
- if ca:
- mupdf.pdf_field_set_button_caption(annot_obj, ca)
- # script (/A) -------------------------------------------------------
- value = GETATTR("script")
- JM_put_script(annot_obj, PDF_NAME('A'), mupdf.PdfObj(), value)
- # script (/AA/K) -------------------------------------------------------
- value = GETATTR("script_stroke")
- JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('K'), value)
- # script (/AA/F) -------------------------------------------------------
- value = GETATTR("script_format")
- JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('F'), value)
- # script (/AA/V) -------------------------------------------------------
- value = GETATTR("script_change")
- JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('V'), value)
- # script (/AA/C) -------------------------------------------------------
- value = GETATTR("script_calc")
- JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('C'), value)
- # script (/AA/Bl) -------------------------------------------------------
- value = GETATTR("script_blur")
- JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value)
- # script (/AA/Fo) codespell:ignore --------------------------------------
- value = GETATTR("script_focus")
- JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value)
- # field value ------------------------------------------------------------
- value = GETATTR("field_value") # field value
- text = JM_StrAsChar(value) # convert to text (may fail!)
- if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
- if not value:
- mupdf.pdf_set_field_value(pdf, annot_obj, "Off", 1)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), "Off")
- else:
- # TODO check if another button in the group is ON and if so set it Off
- onstate = mupdf.pdf_button_field_on_state(annot_obj)
- if onstate.m_internal:
- on = mupdf.pdf_to_name(onstate)
- mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
- elif text:
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), text)
- elif field_type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
- onstate = mupdf.pdf_button_field_on_state(annot_obj)
- on = onstate.pdf_to_name()
- if value in (True, on) or text == 'Yes':
- mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
- mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('V'), on)
- else:
- mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('AS'), 'Off')
- mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('V'), 'Off')
- else:
- if text:
- mupdf.pdf_set_field_value(pdf, annot_obj, text, 1)
- if field_type in (mupdf.PDF_WIDGET_TYPE_COMBOBOX, mupdf.PDF_WIDGET_TYPE_LISTBOX):
- mupdf.pdf_dict_del(annot_obj, PDF_NAME('I'))
- mupdf.pdf_dirty_annot(annot)
- mupdf.pdf_set_annot_hot(annot, 1)
- mupdf.pdf_set_annot_active(annot, 1)
- mupdf.pdf_update_annot(annot)
- def JM_show_string_cs(
- text,
- user_font,
- trm,
- s,
- wmode,
- bidi_level,
- markup_dir,
- language,
- ):
- i = 0
- while i < len(s):
- l, ucs = mupdf.fz_chartorune(s[i:])
- i += l
- gid = mupdf.fz_encode_character_sc(user_font, ucs)
- if gid == 0:
- gid, font = mupdf.fz_encode_character_with_fallback(user_font, ucs, 0, language)
- else:
- font = user_font
- mupdf.fz_show_glyph(text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language)
- adv = mupdf.fz_advance_glyph(font, gid, wmode)
- if wmode == 0:
- trm = mupdf.fz_pre_translate(trm, adv, 0)
- else:
- trm = mupdf.fz_pre_translate(trm, 0, -adv)
- return trm
- def JM_UnicodeFromBuffer(buff):
- buff_bytes = mupdf.fz_buffer_extract_copy(buff)
- val = buff_bytes.decode(errors='replace')
- z = val.find(chr(0))
- if z >= 0:
- val = val[:z]
- return val
- def message_warning(text):
- '''
- Generate a warning.
- '''
- message(f'warning: {text}')
- def JM_update_stream(doc, obj, buffer_, compress):
- '''
- update a stream object
- compress stream when beneficial
- '''
- if compress:
- length, _ = mupdf.fz_buffer_storage(buffer_)
- if length > 30: # ignore small stuff
- buffer_compressed = JM_compress_buffer(buffer_)
- assert isinstance(buffer_compressed, mupdf.FzBuffer)
- if buffer_compressed.m_internal:
- length_compressed, _ = mupdf.fz_buffer_storage(buffer_compressed)
- if length_compressed < length: # was it worth the effort?
- mupdf.pdf_dict_put(
- obj,
- mupdf.PDF_ENUM_NAME_Filter,
- mupdf.PDF_ENUM_NAME_FlateDecode,
- )
- mupdf.pdf_update_stream(doc, obj, buffer_compressed, 1)
- return
-
- mupdf.pdf_update_stream(doc, obj, buffer_, 0)
- def JM_xobject_from_page(pdfout, fsrcpage, xref, gmap):
- '''
- Make an XObject from a PDF page
- For a positive xref assume that its object can be used instead
- '''
- assert isinstance(gmap, mupdf.PdfGraftMap), f'{type(gmap)=}'
- if xref > 0:
- xobj1 = mupdf.pdf_new_indirect(pdfout, xref, 0)
- else:
- srcpage = _as_pdf_page(fsrcpage.this)
- spageref = srcpage.obj()
- mediabox = mupdf.pdf_to_rect(mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('MediaBox')))
- # Deep-copy resources object of source page
- o = mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('Resources'))
- if gmap.m_internal:
- # use graftmap when possible
- resources = mupdf.pdf_graft_mapped_object(gmap, o)
- else:
- resources = mupdf.pdf_graft_object(pdfout, o)
- # get spgage contents source
- res = JM_read_contents(spageref)
- #-------------------------------------------------------------
- # create XObject representing the source page
- #-------------------------------------------------------------
- xobj1 = mupdf.pdf_new_xobject(pdfout, mediabox, mupdf.FzMatrix(), mupdf.PdfObj(0), res)
- # store spage contents
- JM_update_stream(pdfout, xobj1, res, 1)
- # store spage resources
- mupdf.pdf_dict_put(xobj1, PDF_NAME('Resources'), resources)
- return xobj1
- def PySequence_Check(s):
- return isinstance(s, (tuple, list))
- def PySequence_Size(s):
- return len(s)
- # constants: error messages. These are also in extra.i.
- #
- MSG_BAD_ANNOT_TYPE = "bad annot type"
- MSG_BAD_APN = "bad or missing annot AP/N"
- MSG_BAD_ARG_INK_ANNOT = "arg must be seq of seq of float pairs"
- MSG_BAD_ARG_POINTS = "bad seq of points"
- MSG_BAD_BUFFER = "bad type: 'buffer'"
- MSG_BAD_COLOR_SEQ = "bad color sequence"
- MSG_BAD_DOCUMENT = "cannot open broken document"
- MSG_BAD_FILETYPE = "bad filetype"
- MSG_BAD_LOCATION = "bad location"
- MSG_BAD_OC_CONFIG = "bad config number"
- MSG_BAD_OC_LAYER = "bad layer number"
- MSG_BAD_OC_REF = "bad 'oc' reference"
- MSG_BAD_PAGEID = "bad page id"
- MSG_BAD_PAGENO = "bad page number(s)"
- MSG_BAD_PDFROOT = "PDF has no root"
- MSG_BAD_RECT = "rect is infinite or empty"
- MSG_BAD_TEXT = "bad type: 'text'"
- MSG_BAD_XREF = "bad xref"
- MSG_COLOR_COUNT_FAILED = "color count failed"
- MSG_FILE_OR_BUFFER = "need font file or buffer"
- MSG_FONT_FAILED = "cannot create font"
- MSG_IS_NO_ANNOT = "is no annotation"
- MSG_IS_NO_IMAGE = "is no image"
- MSG_IS_NO_PDF = "is no PDF"
- MSG_IS_NO_DICT = "object is no PDF dict"
- MSG_PIX_NOALPHA = "source pixmap has no alpha"
- MSG_PIXEL_OUTSIDE = "pixel(s) outside image"
- JM_Exc_FileDataError = 'FileDataError'
- PyExc_ValueError = 'ValueError'
- def RAISEPY( msg, exc):
- #JM_Exc_CurrentException=exc
- #fz_throw(context, FZ_ERROR_GENERIC, msg)
- raise Exception( msg)
- def PyUnicode_DecodeRawUnicodeEscape(s, errors='strict'):
- # FIXED: handle raw unicode escape sequences
- if not s:
- return ""
- if isinstance(s, str):
- rc = s.encode("utf8", errors=errors)
- elif isinstance(s, bytes):
- rc = s[:]
- ret = rc.decode('raw_unicode_escape', errors=errors)
- return ret
- def CheckColor(c: OptSeq):
- if c:
- if (
- type(c) not in (list, tuple)
- or len(c) not in (1, 3, 4)
- or min(c) < 0
- or max(c) > 1
- ):
- raise ValueError("need 1, 3 or 4 color components in range 0 to 1")
- def CheckFont(page: Page, fontname: str) -> tuple:
- """Return an entry in the page's font list if reference name matches.
- """
- for f in page.get_fonts():
- if f[4] == fontname:
- return f
- def CheckFontInfo(doc: Document, xref: int) -> list:
- """Return a font info if present in the document.
- """
- for f in doc.FontInfos:
- if xref == f[0]:
- return f
- def CheckMarkerArg(quads: typing.Any) -> tuple:
- if CheckRect(quads):
- r = Rect(quads)
- return (r.quad,)
- if CheckQuad(quads):
- return (quads,)
- for q in quads:
- if not (CheckRect(q) or CheckQuad(q)):
- raise ValueError("bad quads entry")
- return quads
- def CheckMorph(o: typing.Any) -> bool:
- if not bool(o):
- return False
- if not (type(o) in (list, tuple) and len(o) == 2):
- raise ValueError("morph must be a sequence of length 2")
- if not (len(o[0]) == 2 and len(o[1]) == 6):
- raise ValueError("invalid morph param 0")
- if not o[1][4] == o[1][5] == 0:
- raise ValueError("invalid morph param 1")
- return True
- def CheckParent(o: typing.Any):
- return
- if not hasattr(o, "parent") or o.parent is None:
- raise ValueError(f"orphaned object {type(o)=}: parent is None")
- def CheckQuad(q: typing.Any) -> bool:
- """Check whether an object is convex, not empty quad-like.
- It must be a sequence of 4 number pairs.
- """
- try:
- q0 = Quad(q)
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return False
- return q0.is_convex
- def CheckRect(r: typing.Any) -> bool:
- """Check whether an object is non-degenerate rect-like.
- It must be a sequence of 4 numbers.
- """
- try:
- r = Rect(r)
- except Exception:
- if g_exceptions_verbose > 1: exception_info()
- return False
- return not (r.is_empty or r.is_infinite)
- def ColorCode(c: typing.Union[list, tuple, float, None], f: str) -> str:
- if not c:
- return ""
- if hasattr(c, "__float__"):
- c = (c,)
- CheckColor(c)
- if len(c) == 1:
- s = _format_g(c[0]) + " "
- return s + "G " if f == "c" else s + "g "
- if len(c) == 3:
- s = _format_g(tuple(c)) + " "
- return s + "RG " if f == "c" else s + "rg "
- s = _format_g(tuple(c)) + " "
- return s + "K " if f == "c" else s + "k "
- def Page__add_text_marker(self, quads, annot_type):
- pdfpage = self._pdf_page()
- rotation = JM_page_rotation(pdfpage)
- def final():
- if rotation != 0:
- mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), rotation)
- try:
- if rotation != 0:
- mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), 0)
- annot = mupdf.pdf_create_annot(pdfpage, annot_type)
- for item in quads:
- q = JM_quad_from_py(item)
- mupdf.pdf_add_annot_quad_point(annot, q)
- mupdf.pdf_update_annot(annot)
- JM_add_annot_id(annot, "A")
- final()
- except Exception:
- if g_exceptions_verbose: exception_info()
- final()
- return
- return Annot(annot)
- def PDF_NAME(x):
- assert isinstance(x, str)
- ret = getattr(mupdf, f'PDF_ENUM_NAME_{x}')
- # Note that we return a (swig proxy for) pdf_obj*, not a mupdf.PdfObj. In
- # the C++ API, the constructor PdfObj::PdfObj(pdf_obj*) is marked as
- # explicit, but this seems to be ignored by SWIG. If SWIG started to
- # generate code that respected `explicit`, we would need to do `return
- # mupdf.PdfObj(ret)`.
- #
- # [Compare with extra.i, where we define our own PDF_NAME2() macro that
- # returns a mupdf::PdfObj.]
- return ret
- def UpdateFontInfo(doc: Document, info: typing.Sequence):
- xref = info[0]
- found = False
- for i, fi in enumerate(doc.FontInfos):
- if fi[0] == xref:
- found = True
- break
- if found:
- doc.FontInfos[i] = info
- else:
- doc.FontInfos.append(info)
- def args_match(args, *types):
- '''
- Returns true if <args> matches <types>.
- Each item in <types> is a type or tuple of types. Any of these types will
- match an item in <args>. `None` will match anything in <args>. `type(None)`
- will match an arg whose value is `None`.
- '''
- j = 0
- for i in range(len(types)):
- type_ = types[i]
- if j >= len(args):
- if isinstance(type_, tuple) and None in type_:
- # arg is missing but has default value.
- continue
- else:
- return False
- if type_ is not None and not isinstance(args[j], type_):
- return False
- j += 1
- if j != len(args):
- return False
- return True
- def calc_image_matrix(width, height, tr, rotate, keep):
- '''
- # compute image insertion matrix
- '''
- trect = JM_rect_from_py(tr)
- rot = mupdf.fz_rotate(rotate)
- trw = trect.x1 - trect.x0
- trh = trect.y1 - trect.y0
- w = trw
- h = trh
- if keep:
- large = max(width, height)
- fw = width / large
- fh = height / large
- else:
- fw = fh = 1
- small = min(fw, fh)
- if rotate != 0 and rotate != 180:
- f = fw
- fw = fh
- fh = f
- if fw < 1:
- if trw / fw > trh / fh:
- w = trh * small
- h = trh
- else:
- w = trw
- h = trw / small
- elif fw != fh:
- if trw / fw > trh / fh:
- w = trh / small
- h = trh
- else:
- w = trw
- h = trw * small
- else:
- w = trw
- h = trh
- tmp = mupdf.fz_make_point(
- (trect.x0 + trect.x1) / 2,
- (trect.y0 + trect.y1) / 2,
- )
- mat = mupdf.fz_make_matrix(1, 0, 0, 1, -0.5, -0.5)
- mat = mupdf.fz_concat(mat, rot)
- mat = mupdf.fz_concat(mat, mupdf.fz_scale(w, h))
- mat = mupdf.fz_concat(mat, mupdf.fz_translate(tmp.x, tmp.y))
- return mat
- def detect_super_script(line, ch):
- if line.m_internal.wmode == 0 and line.m_internal.dir.x == 1 and line.m_internal.dir.y == 0:
- return ch.m_internal.origin.y < line.m_internal.first_char.origin.y - ch.m_internal.size * 0.1
- return 0
- def dir_str(x):
- ret = f'{x} {type(x)} ({len(dir(x))}):\n'
- for i in dir(x):
- ret += f' {i}\n'
- return ret
- def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, ordering: int) -> str:
- """ Return a PDF string enclosed in [] brackets, suitable for the PDF TJ
- operator.
- Notes:
- The input string is converted to either 2 or 4 hex digits per character.
- Args:
- simple: no glyphs: 2-chars, use char codes as the glyph
- glyphs: 2-chars, use glyphs instead of char codes (Symbol,
- ZapfDingbats)
- not simple: ordering < 0: 4-chars, use glyphs not char codes
- ordering >=0: a CJK font! 4 chars, use char codes as glyphs
- """
- if text.startswith("[<") and text.endswith(">]"): # already done
- return text
- if not bool(text):
- return "[<>]"
- if simple: # each char or its glyph is coded as a 2-byte hex
- if glyphs is None: # not Symbol, not ZapfDingbats: use char code
- otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text])
- else: # Symbol or ZapfDingbats: use glyphs
- otxt = "".join(
- ["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text]
- )
- return "[<" + otxt + ">]"
- # non-simple fonts: each char or its glyph is coded as 4-byte hex
- if ordering < 0: # not a CJK font: use the glyphs
- otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text])
- else: # CJK: use the char codes
- otxt = "".join(["%04x" % ord(c) for c in text])
- return "[<" + otxt + ">]"
- def get_pdf_str(s: str) -> str:
- """ Return a PDF string depending on its coding.
- Notes:
- Returns a string bracketed with either "()" or "<>" for hex values.
- If only ascii then "(original)" is returned, else if only 8 bit chars
- then "(original)" with interspersed octal strings \nnn is returned,
- else a string "<FEFF[hexstring]>" is returned, where [hexstring] is the
- UTF-16BE encoding of the original.
- """
- if not bool(s):
- return "()"
- def make_utf16be(s):
- r = bytearray([254, 255]) + bytearray(s, "UTF-16BE")
- return "<" + r.hex() + ">" # brackets indicate hex
- # The following either returns the original string with mixed-in
- # octal numbers \nnn for chars outside the ASCII range, or returns
- # the UTF-16BE BOM version of the string.
- r = ""
- for c in s:
- oc = ord(c)
- if oc > 255: # shortcut if beyond 8-bit code range
- return make_utf16be(s)
- if oc > 31 and oc < 127: # in ASCII range
- if c in ("(", ")", "\\"): # these need to be escaped
- r += "\\"
- r += c
- continue
- if oc > 127: # beyond ASCII
- r += "\\%03o" % oc
- continue
- # now the white spaces
- if oc == 8: # backspace
- r += "\\b"
- elif oc == 9: # tab
- r += "\\t"
- elif oc == 10: # line feed
- r += "\\n"
- elif oc == 12: # form feed
- r += "\\f"
- elif oc == 13: # carriage return
- r += "\\r"
- else:
- r += "\\267" # unsupported: replace by 0xB7
- return "(" + r + ")"
- def get_tessdata(tessdata=None):
- """Detect Tesseract language support folder.
- This function is used to enable OCR via Tesseract even if the language
- support folder is not specified directly or in environment variable
- TESSDATA_PREFIX.
- * If <tessdata> is set we return it directly.
-
- * Otherwise we return `os.environ['TESSDATA_PREFIX']` if set.
-
- * Otherwise we search for a Tesseract installation and return its language
- support folder.
- * Otherwise we raise an exception.
- """
- if tessdata:
- return tessdata
- tessdata = os.getenv("TESSDATA_PREFIX")
- if tessdata: # use environment variable if set
- return tessdata
- # Try to locate the tesseract-ocr installation.
-
- import subprocess
-
- cp = subprocess.run('tesseract --list-langs', shell=1, capture_output=1, check=0, text=True)
- if cp.returncode == 0:
- m = re.search('List of available languages in "(.+)"', cp.stdout)
- if m:
- tessdata = m.group(1)
- return tessdata
-
- # Windows systems:
- if sys.platform == "win32":
- cp = subprocess.run("where tesseract", shell=1, capture_output=1, check=0, text=True)
- response = cp.stdout.strip()
- if cp.returncode or not response:
- raise RuntimeError("No tessdata specified and Tesseract is not installed")
- dirname = os.path.dirname(response) # path of tesseract.exe
- tessdata = os.path.join(dirname, "tessdata") # language support
- if os.path.exists(tessdata): # all ok?
- return tessdata
- else: # should not happen!
- raise RuntimeError("No tessdata specified and Tesseract installation has no {tessdata} folder")
- # Unix-like systems:
- attempts = list()
- for path in 'tesseract-ocr', 'tesseract':
- cp = subprocess.run(f'whereis {path}', shell=1, capture_output=1, check=0, text=True)
- if cp.returncode == 0:
- response = cp.stdout.strip().split()
- if len(response) == 2:
- # search tessdata in folder structure
- dirname = response[1] # contains tesseract-ocr installation folder
- pattern = f"{dirname}/*/tessdata"
- attempts.append(pattern)
- tessdatas = glob.glob(pattern)
- tessdatas.sort()
- if tessdatas:
- return tessdatas[-1]
- if attempts:
- text = 'No tessdata specified and no match for:\n'
- for attempt in attempts:
- text += f' {attempt}'
- raise RuntimeError(text)
- else:
- raise RuntimeError('No tessdata specified and Tesseract is not installed')
- def css_for_pymupdf_font(
- fontcode: str, *, CSS: OptStr = None, archive: AnyType = None, name: OptStr = None
- ) -> str:
- """Create @font-face items for the given fontcode of pymupdf-fonts.
- Adds @font-face support for fonts contained in package pymupdf-fonts.
- Creates a CSS font-family for all fonts starting with string 'fontcode'.
- Note:
- The font naming convention in package pymupdf-fonts is "fontcode<sf>",
- where the suffix "sf" is either empty or one of "it", "bo" or "bi".
- These suffixes thus represent the regular, italic, bold or bold-italic
- variants of a font. For example, font code "notos" refers to fonts
- "notos" - "Noto Sans Regular"
- "notosit" - "Noto Sans Italic"
- "notosbo" - "Noto Sans Bold"
- "notosbi" - "Noto Sans Bold Italic"
- This function creates four CSS @font-face definitions and collectively
- assigns the font-family name "notos" to them (or the "name" value).
- All fitting font buffers of the pymupdf-fonts package are placed / added
- to the archive provided as parameter.
- To use the font in pymupdf.Story, execute 'set_font(fontcode)'. The correct
- font weight (bold) or style (italic) will automatically be selected.
- Expects and returns the CSS source, with the new CSS definitions appended.
- Args:
- fontcode: (str) font code for naming the font variants to include.
- E.g. "fig" adds notos, notosi, notosb, notosbi fonts.
- A maximum of 4 font variants is accepted.
- CSS: (str) CSS string to add @font-face definitions to.
- archive: (Archive, mandatory) where to place the font buffers.
- name: (str) use this as family-name instead of 'fontcode'.
- Returns:
- Modified CSS, with appended @font-face statements for each font variant
- of fontcode.
- Fontbuffers associated with "fontcode" will be added to 'archive'.
- """
- # @font-face template string
- CSSFONT = "\n@font-face {font-family: %s; src: url(%s);%s%s}\n"
- if not type(archive) is Archive:
- raise ValueError("'archive' must be an Archive")
- if CSS is None:
- CSS = ""
- # select font codes starting with the pass-in string
- font_keys = [k for k in fitz_fontdescriptors.keys() if k.startswith(fontcode)]
- if font_keys == []:
- raise ValueError(f"No font code '{fontcode}' found in pymupdf-fonts.")
- if len(font_keys) > 4:
- raise ValueError("fontcode too short")
- if name is None: # use this name for font-family
- name = fontcode
- for fkey in font_keys:
- font = fitz_fontdescriptors[fkey]
- bold = font["bold"] # determine font property
- italic = font["italic"] # determine font property
- fbuff = font["loader"]() # load the fontbuffer
- archive.add(fbuff, fkey) # update the archive
- bold_text = "font-weight: bold;" if bold else ""
- italic_text = "font-style: italic;" if italic else ""
- CSS += CSSFONT % (name, fkey, bold_text, italic_text)
- return CSS
- def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float:
- """Calculate length of a string for a built-in font.
- Args:
- fontname: name of the font.
- fontsize: font size points.
- encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic.
- Returns:
- (float) length of text.
- """
- fontname = fontname.lower()
- basename = Base14_fontdict.get(fontname, None)
- glyphs = None
- if basename == "Symbol":
- glyphs = symbol_glyphs
- if basename == "ZapfDingbats":
- glyphs = zapf_glyphs
- if glyphs is not None:
- w = sum([glyphs[ord(c)][1] if ord(c) < 256 else glyphs[183][1] for c in text])
- return w * fontsize
- if fontname in Base14_fontdict.keys():
- return util_measure_string(
- text, Base14_fontdict[fontname], fontsize, encoding
- )
- if fontname in (
- "china-t",
- "china-s",
- "china-ts",
- "china-ss",
- "japan",
- "japan-s",
- "korea",
- "korea-s",
- ):
- return len(text) * fontsize
- raise ValueError("Font '%s' is unsupported" % fontname)
- def image_profile(img: ByteString) -> dict:
- """ Return basic properties of an image.
- Args:
- img: bytes, bytearray, io.BytesIO object or an opened image file.
- Returns:
- A dictionary with keys width, height, colorspace.n, bpc, type, ext and size,
- where 'type' is the MuPDF image type (0 to 14) and 'ext' the suitable
- file extension.
- """
- if type(img) is io.BytesIO:
- stream = img.getvalue()
- elif hasattr(img, "read"):
- stream = img.read()
- elif type(img) in (bytes, bytearray):
- stream = img
- else:
- raise ValueError("bad argument 'img'")
- return TOOLS.image_profile(stream)
- def jm_append_merge(dev):
- '''
- Append current path to list or merge into last path of the list.
- (1) Append if first path, different item lists or not a 'stroke' version
- of previous path
- (2) If new path has the same items, merge its content into previous path
- and change path["type"] to "fs".
- (3) If "out" is callable, skip the previous and pass dictionary to it.
- '''
- #log(f'{getattr(dev, "pathdict", None)=}')
- assert isinstance(dev.out, list)
- #log( f'{dev.out=}')
-
- if callable(dev.method) or dev.method: # function or method
- # callback.
- if dev.method is None:
- # fixme, this surely cannot happen?
- assert 0
- #resp = PyObject_CallFunctionObjArgs(out, dev.pathdict, NULL)
- else:
- #log(f'calling {dev.out=} {dev.method=} {dev.pathdict=}')
- resp = getattr(dev.out, dev.method)(dev.pathdict)
- if not resp:
- message("calling cdrawings callback function/method failed!")
- dev.pathdict = None
- return
-
- def append():
- #log(f'jm_append_merge(): clearing dev.pathdict')
- dev.out.append(dev.pathdict.copy())
- dev.pathdict.clear()
- assert isinstance(dev.out, list)
- len_ = len(dev.out) # len of output list so far
- #log('{len_=}')
- if len_ == 0: # always append first path
- return append()
- #log(f'{getattr(dev, "pathdict", None)=}')
- thistype = dev.pathdict[ dictkey_type]
- #log(f'{thistype=}')
- if thistype != 's': # if not stroke, then append
- return append()
- prev = dev.out[ len_-1] # get prev path
- #log( f'{prev=}')
- prevtype = prev[ dictkey_type]
- #log( f'{prevtype=}')
- if prevtype != 'f': # if previous not fill, append
- return append()
- # last check: there must be the same list of items for "f" and "s".
- previtems = prev[ dictkey_items]
- thisitems = dev.pathdict[ dictkey_items]
- if previtems != thisitems:
- return append()
-
- #rc = PyDict_Merge(prev, dev.pathdict, 0); // merge with no override
- try:
- for k, v in dev.pathdict.items():
- if k not in prev:
- prev[k] = v
- rc = 0
- except Exception:
- if g_exceptions_verbose: exception_info()
- #raise
- rc = -1
- if rc == 0:
- prev[ dictkey_type] = 'fs'
- dev.pathdict.clear()
- else:
- message("could not merge stroke and fill path")
- append()
- def jm_bbox_add_rect( dev, ctx, rect, code):
- if not dev.layers:
- dev.result.append( (code, JM_py_from_rect(rect)))
- else:
- dev.result.append( (code, JM_py_from_rect(rect), dev.layer_name))
- def jm_bbox_fill_image( dev, ctx, image, ctm, alpha, color_params):
- r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
- r = mupdf.ll_fz_transform_rect( r.internal(), ctm)
- jm_bbox_add_rect( dev, ctx, r, "fill-image")
- def jm_bbox_fill_image_mask( dev, ctx, image, ctm, colorspace, color, alpha, color_params):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_transform_rect(mupdf.fz_unit_rect, ctm), "fill-imgmask")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
- even_odd = True if even_odd else False
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path(path, None, ctm), "fill-path")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_fill_shade( dev, ctx, shade, ctm, alpha, color_params):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_shade( shade, ctm), "fill-shade")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_stroke_text( dev, ctx, text, stroke, ctm, *args):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, stroke, ctm), "stroke-text")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_fill_text( dev, ctx, text, ctm, *args):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, None, ctm), "fill-text")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_bbox_ignore_text( dev, ctx, text, ctm):
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text(text, None, ctm), "ignore-text")
- def jm_bbox_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
- try:
- jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path( path, stroke, ctm), "stroke-path")
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_checkquad(dev):
- '''
- Check whether the last 4 lines represent a quad.
- Because of how we count, the lines are a polyline already, i.e. last point
- of a line equals 1st point of next line.
- So we check for a polygon (last line's end point equals start point).
- If not true we return 0.
- '''
- #log(f'{getattr(dev, "pathdict", None)=}')
- items = dev.pathdict[ dictkey_items]
- len_ = len(items)
- f = [0] * 8 # coordinates of the 4 corners
- # fill the 8 floats in f, start from items[-4:]
- for i in range( 4): # store line start points
- line = items[ len_ - 4 + i]
- temp = JM_point_from_py( line[1])
- f[i * 2] = temp.x
- f[i * 2 + 1] = temp.y
- lp = JM_point_from_py( line[ 2])
- if lp.x != f[0] or lp.y != f[1]:
- # not a polygon!
- #dev.linecount -= 1
- return 0
-
- # we have detected a quad
- dev.linecount = 0 # reset this
- # a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items
- # are pairs of floats representing a quad corner each.
-
- # relationship of float array to quad points:
- # (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr
- q = mupdf.fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5])
- rect = ('qu', JM_py_from_quad(q))
-
- items[ len_ - 4] = rect # replace item -4 by rect
- del items[ len_ - 3 : len_] # delete remaining 3 items
- return 1
- def jm_checkrect(dev):
- '''
- Check whether the last 3 path items represent a rectangle.
- Returns 1 if we have modified the path, otherwise 0.
- '''
- #log(f'{getattr(dev, "pathdict", None)=}')
- dev.linecount = 0 # reset line count
- orientation = 0 # area orientation of rectangle
- items = dev.pathdict[ dictkey_items]
- len_ = len(items)
- line0 = items[ len_ - 3]
- ll = JM_point_from_py( line0[ 1])
- lr = JM_point_from_py( line0[ 2])
- # no need to extract "line1"!
- line2 = items[ len_ - 1]
- ur = JM_point_from_py( line2[ 1])
- ul = JM_point_from_py( line2[ 2])
- # Assumption:
- # When decomposing rects, MuPDF always starts with a horizontal line,
- # followed by a vertical line, followed by a horizontal line.
- # First line: (ll, lr), third line: (ul, ur).
- # If 1st line is below 3rd line, we record anti-clockwise (+1), else
- # clockwise (-1) orientation.
-
- if (0
- or ll.y != lr.y
- or ll.x != ul.x
- or ur.y != ul.y
- or ur.x != lr.x
- ):
- return 0 # not a rectangle
-
- # we have a rect, replace last 3 "l" items by one "re" item.
- if ul.y < lr.y:
- r = mupdf.fz_make_rect(ul.x, ul.y, lr.x, lr.y)
- orientation = 1
- else:
- r = mupdf.fz_make_rect(ll.x, ll.y, ur.x, ur.y)
- orientation = -1
-
- rect = ( 're', JM_py_from_rect(r), orientation)
- items[ len_ - 3] = rect # replace item -3 by rect
- del items[ len_ - 2 : len_] # delete remaining 2 items
- return 1
- def jm_trace_text( dev, text, type_, ctm, colorspace, color, alpha, seqno):
- span = text.head
- while 1:
- if not span:
- break
- jm_trace_text_span( dev, span, type_, ctm, colorspace, color, alpha, seqno)
- span = span.next
- def jm_trace_text_span(dev, span, type_, ctm, colorspace, color, alpha, seqno):
- '''
- jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, size_t seqno)
- '''
- out_font = None
- assert isinstance( span, mupdf.fz_text_span)
- span = mupdf.FzTextSpan( span)
- assert isinstance( ctm, mupdf.fz_matrix)
- ctm = mupdf.FzMatrix( ctm)
- fontname = JM_font_name( span.font())
- #float rgb[3];
- #PyObject *chars = PyTuple_New(span->len);
-
- mat = mupdf.fz_concat(span.trm(), ctm) # text transformation matrix
- dir = mupdf.fz_transform_vector(mupdf.fz_make_point(1, 0), mat) # writing direction
- fsize = math.sqrt(dir.x * dir.x + dir.y * dir.y) # font size
- dir = mupdf.fz_normalize_vector(dir)
- space_adv = 0
- asc = JM_font_ascender( span.font())
- dsc = JM_font_descender( span.font())
- if asc < 1e-3: # probably Tesseract font
- dsc = -0.1
- asc = 0.9
- # compute effective ascender / descender
- ascsize = asc * fsize / (asc - dsc)
- dscsize = dsc * fsize / (asc - dsc)
- fflags = 0 # font flags
- mono = mupdf.fz_font_is_monospaced( span.font())
- fflags += mono * TEXT_FONT_MONOSPACED
- fflags += mupdf.fz_font_is_italic( span.font()) * TEXT_FONT_ITALIC
- fflags += mupdf.fz_font_is_serif( span.font()) * TEXT_FONT_SERIFED
- fflags += mupdf.fz_font_is_bold( span.font()) * TEXT_FONT_BOLD
- last_adv = 0
- # walk through characters of span
- span_bbox = mupdf.FzRect()
- rot = mupdf.fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0)
- if dir.x == -1: # left-right flip
- rot.d = 1
- chars = []
- for i in range( span.m_internal.len):
- adv = 0
- if span.items(i).gid >= 0:
- adv = mupdf.fz_advance_glyph( span.font(), span.items(i).gid, span.m_internal.wmode)
- adv *= fsize
- last_adv = adv
- if span.items(i).ucs == 32:
- space_adv = adv
- char_orig = mupdf.fz_make_point(span.items(i).x, span.items(i).y)
- char_orig = mupdf.fz_transform_point(char_orig, ctm)
- m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y)
- m1 = mupdf.fz_concat(m1, rot)
- m1 = mupdf.fz_concat(m1, mupdf.FzMatrix(1, 0, 0, 1, char_orig.x, char_orig.y))
- x0 = char_orig.x
- x1 = x0 + adv
- if (
- (mat.d > 0 and (dir.x == 1 or dir.x == -1))
- or
- (mat.b != 0 and mat.b == -mat.c)
- ): # up-down flip
- y0 = char_orig.y + dscsize
- y1 = char_orig.y + ascsize
- else:
- y0 = char_orig.y - ascsize
- y1 = char_orig.y - dscsize
- char_bbox = mupdf.fz_make_rect(x0, y0, x1, y1)
- char_bbox = mupdf.fz_transform_rect(char_bbox, m1)
- chars.append(
- (
- span.items(i).ucs,
- span.items(i).gid,
- (
- char_orig.x,
- char_orig.y,
- ),
- (
- char_bbox.x0,
- char_bbox.y0,
- char_bbox.x1,
- char_bbox.y1,
- ),
- )
- )
- if i > 0:
- span_bbox = mupdf.fz_union_rect(span_bbox, char_bbox)
- else:
- span_bbox = char_bbox
- chars = tuple(chars)
-
- if not space_adv:
- if not (fflags & TEXT_FONT_MONOSPACED):
- c, out_font = mupdf.fz_encode_character_with_fallback( span.font(), 32, 0, 0)
- space_adv = mupdf.fz_advance_glyph(
- span.font(),
- c,
- span.m_internal.wmode,
- )
- space_adv *= fsize
- if not space_adv:
- space_adv = last_adv
- else:
- space_adv = last_adv # for mono, any char width suffices
- # make the span dictionary
- span_dict = dict()
- span_dict[ 'dir'] = JM_py_from_point(dir)
- span_dict[ 'font'] = JM_EscapeStrFromStr(fontname)
- span_dict[ 'wmode'] = span.m_internal.wmode
- span_dict[ 'flags'] =fflags
- span_dict[ "bidi_lvl"] =span.m_internal.bidi_level
- span_dict[ "bidi_dir"] = span.m_internal.markup_dir
- span_dict[ 'ascender'] = asc
- span_dict[ 'descender'] = dsc
- span_dict[ 'colorspace'] = 3
-
- if colorspace:
- rgb = mupdf.fz_convert_color(
- mupdf.FzColorspace( mupdf.ll_fz_keep_colorspace( colorspace)),
- color,
- mupdf.fz_device_rgb(),
- mupdf.FzColorspace(),
- mupdf.FzColorParams(),
- )
- rgb = rgb[:3] # mupdf.fz_convert_color() always returns 4 items.
- else:
- rgb = (0, 0, 0)
-
- if dev.linewidth > 0: # width of character border
- linewidth = dev.linewidth
- else:
- linewidth = fsize * 0.05 # default: 5% of font size
- #log(f'{dev.linewidth=:.4f} {fsize=:.4f} {linewidth=:.4f}')
-
- span_dict[ 'color'] = rgb
- span_dict[ 'size'] = fsize
- span_dict[ "opacity"] = alpha
- span_dict[ "linewidth"] = linewidth
- span_dict[ "spacewidth"] = space_adv
- span_dict[ 'type'] = type_
- span_dict[ 'bbox'] = JM_py_from_rect(span_bbox)
- span_dict[ 'layer'] = dev.layer_name
- span_dict[ "seqno"] = seqno
- span_dict[ 'chars'] = chars
- #log(f'{span_dict=}')
- dev.out.append( span_dict)
- def jm_lineart_color(colorspace, color):
- #log(f' ')
- if colorspace:
- try:
- # Need to be careful to use a named Python object to ensure
- # that the `params` we pass to mupdf.ll_fz_convert_color() is
- # valid. E.g. doing:
- #
- # rgb = mupdf.ll_fz_convert_color(..., mupdf.FzColorParams().internal())
- #
- # - seems to end up with a corrupted `params`.
- #
- cs = mupdf.FzColorspace( mupdf.FzColorspace.Fixed_RGB)
- cp = mupdf.FzColorParams()
- rgb = mupdf.ll_fz_convert_color(
- colorspace,
- color,
- cs.m_internal,
- None,
- cp.internal(),
- )
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- return rgb[:3]
- return ()
- def jm_lineart_drop_device(dev, ctx):
- if isinstance(dev.out, list):
- dev.out = []
- dev.scissors = []
-
-
- def jm_lineart_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
- #log(f'{getattr(dev, "pathdict", None)=}')
- #log(f'jm_lineart_fill_path(): {dev.seqno=}')
- even_odd = True if even_odd else False
- try:
- assert isinstance( ctm, mupdf.fz_matrix)
- dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
- dev.path_type = trace_device_FILL_PATH
- jm_lineart_path( dev, ctx, path)
- if dev.pathdict is None:
- return
- #item_count = len(dev.pathdict[ dictkey_items])
- #if item_count == 0:
- # return
- dev.pathdict[ dictkey_type] ="f"
- dev.pathdict[ "even_odd"] = even_odd
- dev.pathdict[ "fill_opacity"] = alpha
- #log(f'setting dev.pathdict[ "closePath"] to false')
- #dev.pathdict[ "closePath"] = False
- dev.pathdict[ "fill"] = jm_lineart_color( colorspace, color)
- dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
- dev.pathdict[ "seqno"] = dev.seqno
- #jm_append_merge(dev)
- dev.pathdict[ 'layer'] = dev.layer_name
- if dev.clips:
- dev.pathdict[ 'level'] = dev.depth
- jm_append_merge(dev)
- dev.seqno += 1
- #log(f'jm_lineart_fill_path() end: {getattr(dev, "pathdict", None)=}')
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- # There are 3 text trace types:
- # 0 - fill text (PDF Tr 0)
- # 1 - stroke text (PDF Tr 1)
- # 3 - ignore text (PDF Tr 3)
- def jm_lineart_fill_text( dev, ctx, text, ctm, colorspace, color, alpha, color_params):
- if 0:
- log(f'{type(ctx)=} {ctx=}')
- log(f'{type(dev)=} {dev=}')
- log(f'{type(text)=} {text=}')
- log(f'{type(ctm)=} {ctm=}')
- log(f'{type(colorspace)=} {colorspace=}')
- log(f'{type(color)=} {color=}')
- log(f'{type(alpha)=} {alpha=}')
- log(f'{type(color_params)=} {color_params=}')
- jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev.seqno)
- dev.seqno += 1
- def jm_lineart_ignore_text(dev, text, ctm):
- #log(f'{getattr(dev, "pathdict", None)=}')
- jm_trace_text(dev, text, 3, ctm, None, None, 1, dev.seqno)
- dev.seqno += 1
- class Walker(mupdf.FzPathWalker2):
- def __init__(self, dev):
- super().__init__()
- self.use_virtual_moveto()
- self.use_virtual_lineto()
- self.use_virtual_curveto()
- self.use_virtual_closepath()
- self.dev = dev
- def closepath(self, ctx): # trace_close().
- #log(f'Walker(): {self.dev.pathdict=}')
- try:
- if self.dev.linecount == 3:
- if jm_checkrect(self.dev):
- #log(f'end1: {self.dev.pathdict=}')
- return
- self.dev.linecount = 0 # reset # of consec. lines
- if self.dev.havemove:
- if self.dev.lastpoint != self.dev.firstpoint:
- item = ("l", JM_py_from_point(self.dev.lastpoint),
- JM_py_from_point(self.dev.firstpoint))
- self.dev.pathdict[dictkey_items].append(item)
- self.dev.lastpoint = self.dev.firstpoint
- self.dev.pathdict["closePath"] = False
- else:
- #log('setting self.dev.pathdict[ "closePath"] to true')
- self.dev.pathdict[ "closePath"] = True
- #log(f'end2: {self.dev.pathdict=}')
- self.dev.havemove = 0
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def curveto(self, ctx, x1, y1, x2, y2, x3, y3): # trace_curveto().
- #log(f'Walker(): {self.dev.pathdict=}')
- try:
- self.dev.linecount = 0 # reset # of consec. lines
- p1 = mupdf.fz_make_point(x1, y1)
- p2 = mupdf.fz_make_point(x2, y2)
- p3 = mupdf.fz_make_point(x3, y3)
- p1 = mupdf.fz_transform_point(p1, self.dev.ctm)
- p2 = mupdf.fz_transform_point(p2, self.dev.ctm)
- p3 = mupdf.fz_transform_point(p3, self.dev.ctm)
- self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p1)
- self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p2)
- self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p3)
- list_ = (
- "c",
- JM_py_from_point(self.dev.lastpoint),
- JM_py_from_point(p1),
- JM_py_from_point(p2),
- JM_py_from_point(p3),
- )
- self.dev.lastpoint = p3
- self.dev.pathdict[ dictkey_items].append( list_)
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def lineto(self, ctx, x, y): # trace_lineto().
- #log(f'Walker(): {self.dev.pathdict=}')
- try:
- p1 = mupdf.fz_transform_point( mupdf.fz_make_point(x, y), self.dev.ctm)
- self.dev.pathrect = mupdf.fz_include_point_in_rect( self.dev.pathrect, p1)
- list_ = (
- 'l',
- JM_py_from_point( self.dev.lastpoint),
- JM_py_from_point(p1),
- )
- self.dev.lastpoint = p1
- items = self.dev.pathdict[ dictkey_items]
- items.append( list_)
- self.dev.linecount += 1 # counts consecutive lines
- if self.dev.linecount == 4 and self.dev.path_type != trace_device_FILL_PATH:
- # shrink to "re" or "qu" item
- jm_checkquad(self.dev)
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def moveto(self, ctx, x, y): # trace_moveto().
- if 0 and isinstance(self.dev.pathdict, dict):
- log(f'self.dev.pathdict:')
- for n, v in self.dev.pathdict.items():
- log( ' {type(n)=} {len(n)=} {n!r} {n}: {v!r}: {v}')
- #log(f'Walker(): {type(self.dev.pathdict)=} {self.dev.pathdict=}')
- try:
- #log( '{=dev.ctm type(dev.ctm)}')
- self.dev.lastpoint = mupdf.fz_transform_point(
- mupdf.fz_make_point(x, y),
- self.dev.ctm,
- )
- if mupdf.fz_is_infinite_rect( self.dev.pathrect):
- self.dev.pathrect = mupdf.fz_make_rect(
- self.dev.lastpoint.x,
- self.dev.lastpoint.y,
- self.dev.lastpoint.x,
- self.dev.lastpoint.y,
- )
- self.dev.firstpoint = self.dev.lastpoint
- self.dev.havemove = 1
- self.dev.linecount = 0 # reset # of consec. lines
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_lineart_path(dev, ctx, path):
- '''
- Create the "items" list of the path dictionary
- * either create or empty the path dictionary
- * reset the end point of the path
- * reset count of consecutive lines
- * invoke fz_walk_path(), which create the single items
- * if no items detected, empty path dict again
- '''
- #log(f'{getattr(dev, "pathdict", None)=}')
- try:
- dev.pathrect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
- dev.linecount = 0
- dev.lastpoint = mupdf.FzPoint( 0, 0)
- dev.pathdict = dict()
- dev.pathdict[ dictkey_items] = []
-
- # First time we create a Walker instance is slow, e.g. 0.3s, then later
- # times run in around 0.01ms. If Walker is defined locally instead of
- # globally, each time takes 0.3s.
- #
- walker = Walker(dev)
- # Unlike fz_run_page(), fz_path_walker callbacks are not passed
- # a pointer to the struct, instead they get an arbitrary
- # void*. The underlying C++ Director callbacks use this void* to
- # identify the fz_path_walker instance so in turn we need to pass
- # arg=walker.m_internal.
- mupdf.fz_walk_path( mupdf.FzPath(mupdf.ll_fz_keep_path(path)), walker, walker.m_internal)
- # Check if any items were added ...
- if not dev.pathdict[ dictkey_items]:
- dev.pathdict = None
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_lineart_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
- #log(f'{dev.pathdict=} {dev.clips=}')
- try:
- assert isinstance( ctm, mupdf.fz_matrix)
- dev.pathfactor = 1
- if ctm.a != 0 and abs(ctm.a) == abs(ctm.d):
- dev.pathfactor = abs(ctm.a)
- elif ctm.b != 0 and abs(ctm.b) == abs(ctm.c):
- dev.pathfactor = abs(ctm.b)
- dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
- dev.path_type = trace_device_STROKE_PATH
- jm_lineart_path( dev, ctx, path)
- if dev.pathdict is None:
- return
- dev.pathdict[ dictkey_type] = 's'
- dev.pathdict[ 'stroke_opacity'] = alpha
- dev.pathdict[ 'color'] = jm_lineart_color( colorspace, color)
- dev.pathdict[ dictkey_width] = dev.pathfactor * stroke.linewidth
- dev.pathdict[ 'lineCap'] = (
- stroke.start_cap,
- stroke.dash_cap,
- stroke.end_cap,
- )
- dev.pathdict[ 'lineJoin'] = dev.pathfactor * stroke.linejoin
- if 'closePath' not in dev.pathdict:
- #log('setting dev.pathdict["closePath"] to false')
- dev.pathdict['closePath'] = False
- # output the "dashes" string
- if stroke.dash_len:
- buff = mupdf.fz_new_buffer( 256)
- mupdf.fz_append_string( buff, "[ ") # left bracket
- for i in range( stroke.dash_len):
- # We use mupdf python's SWIG-generated floats_getitem() fn to
- # access float *stroke.dash_list[].
- value = mupdf.floats_getitem( stroke.dash_list, i) # stroke.dash_list[i].
- mupdf.fz_append_string( buff, f'{_format_g(dev.pathfactor * value)} ')
- mupdf.fz_append_string( buff, f'] {_format_g(dev.pathfactor * stroke.dash_phase)}')
- dev.pathdict[ 'dashes'] = buff
- else:
- dev.pathdict[ 'dashes'] = '[] 0'
- dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
- dev.pathdict['layer'] = dev.layer_name
- dev.pathdict[ 'seqno'] = dev.seqno
- if dev.clips:
- dev.pathdict[ 'level'] = dev.depth
- jm_append_merge(dev)
- dev.seqno += 1
-
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def jm_lineart_clip_path(dev, ctx, path, even_odd, ctm, scissor):
- if not dev.clips:
- return
- dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
- dev.path_type = trace_device_CLIP_PATH
- jm_lineart_path(dev, ctx, path)
- if dev.pathdict is None:
- return
- dev.pathdict[ dictkey_type] = 'clip'
- dev.pathdict[ 'even_odd'] = bool(even_odd)
- if 'closePath' not in dev.pathdict:
- #log(f'setting dev.pathdict["closePath"] to False')
- dev.pathdict['closePath'] = False
-
- dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
- dev.pathdict['level'] = dev.depth
- dev.pathdict['layer'] = dev.layer_name
- jm_append_merge(dev)
- dev.depth += 1
- def jm_lineart_clip_stroke_path(dev, ctx, path, stroke, ctm, scissor):
- if not dev.clips:
- return
- dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
- dev.path_type = trace_device_CLIP_STROKE_PATH
- jm_lineart_path(dev, ctx, path)
- if dev.pathdict is None:
- return
- dev.pathdict['dictkey_type'] = 'clip'
- dev.pathdict['even_odd'] = None
- if 'closePath' not in dev.pathdict:
- #log(f'setting dev.pathdict["closePath"] to False')
- dev.pathdict['closePath'] = False
- dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
- dev.pathdict['level'] = dev.depth
- dev.pathdict['layer'] = dev.layer_name
- jm_append_merge(dev)
- dev.depth += 1
- def jm_lineart_clip_stroke_text(dev, ctx, text, stroke, ctm, scissor):
- if not dev.clips:
- return
- compute_scissor(dev)
- dev.depth += 1
- def jm_lineart_clip_text(dev, ctx, text, ctm, scissor):
- if not dev.clips:
- return
- compute_scissor(dev)
- dev.depth += 1
- def jm_lineart_clip_image_mask( dev, ctx, image, ctm, scissor):
- if not dev.clips:
- return
- compute_scissor(dev)
- dev.depth += 1
-
- def jm_lineart_pop_clip(dev, ctx):
- if not dev.clips or not dev.scissors:
- return
- len_ = len(dev.scissors)
- if len_ < 1:
- return
- del dev.scissors[-1]
- dev.depth -= 1
- def jm_lineart_begin_layer(dev, ctx, name):
- if name:
- dev.layer_name = name
- else:
- dev.layer_name = ""
- def jm_lineart_end_layer(dev, ctx):
- dev.layer_name = ""
- def jm_lineart_begin_group(dev, ctx, bbox, cs, isolated, knockout, blendmode, alpha):
- #log(f'{dev.pathdict=} {dev.clips=}')
- if not dev.clips:
- return
- dev.pathdict = { # Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}",
- "type": "group",
- "rect": JM_py_from_rect(bbox),
- "isolated": bool(isolated),
- "knockout": bool(knockout),
- "blendmode": mupdf.fz_blendmode_name(blendmode),
- "opacity": alpha,
- "level": dev.depth,
- "layer": dev.layer_name
- }
- jm_append_merge(dev)
- dev.depth += 1
- def jm_lineart_end_group(dev, ctx):
- #log(f'{dev.pathdict=} {dev.clips=}')
- if not dev.clips:
- return
- dev.depth -= 1
- def jm_lineart_stroke_text(dev, ctx, text, stroke, ctm, colorspace, color, alpha, color_params):
- jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev.seqno)
- dev.seqno += 1
- def jm_dev_linewidth( dev, ctx, path, stroke, matrix, colorspace, color, alpha, color_params):
- dev.linewidth = stroke.linewidth
- jm_increase_seqno( dev, ctx)
- def jm_increase_seqno( dev, ctx, *vargs):
- try:
- dev.seqno += 1
- except Exception:
- if g_exceptions_verbose: exception_info()
- raise
- def planish_line(p1: point_like, p2: point_like) -> Matrix:
- """Compute matrix which maps line from p1 to p2 to the x-axis, such that it
- maintains its length and p1 * matrix = Point(0, 0).
- Args:
- p1, p2: point_like
- Returns:
- Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at
- the same distance to Point(0,0). Will always combine a rotation and a
- transformation.
- """
- p1 = Point(p1)
- p2 = Point(p2)
- return Matrix(util_hor_matrix(p1, p2))
- class JM_image_reporter_Filter(mupdf.PdfFilterOptions2):
- def __init__(self):
- super().__init__()
- self.use_virtual_image_filter()
- def image_filter( self, ctx, ctm, name, image):
- assert isinstance(ctm, mupdf.fz_matrix)
- JM_image_filter(self, mupdf.FzMatrix(ctm), name, image)
- if mupdf_cppyy:
- # cppyy doesn't appear to treat returned None as nullptr,
- # resulting in obscure 'python exception' exception.
- return 0
- class JM_new_bbox_device_Device(mupdf.FzDevice2):
- def __init__(self, result, layers):
- super().__init__()
- self.result = result
- self.layers = layers
- self.layer_name = ""
- self.use_virtual_fill_path()
- self.use_virtual_stroke_path()
- self.use_virtual_fill_text()
- self.use_virtual_stroke_text()
- self.use_virtual_ignore_text()
- self.use_virtual_fill_shade()
- self.use_virtual_fill_image()
- self.use_virtual_fill_image_mask()
-
- self.use_virtual_begin_layer()
- self.use_virtual_end_layer()
- begin_layer = jm_lineart_begin_layer
- end_layer = jm_lineart_end_layer
-
- fill_path = jm_bbox_fill_path
- stroke_path = jm_bbox_stroke_path
- fill_text = jm_bbox_fill_text
- stroke_text = jm_bbox_stroke_text
- ignore_text = jm_bbox_ignore_text
- fill_shade = jm_bbox_fill_shade
- fill_image = jm_bbox_fill_image
- fill_image_mask = jm_bbox_fill_image_mask
-
- class JM_new_output_fileptr_Output(mupdf.FzOutput2):
- def __init__(self, bio):
- super().__init__()
- self.bio = bio
- self.use_virtual_write()
- self.use_virtual_seek()
- self.use_virtual_tell()
- self.use_virtual_truncate()
-
- def seek( self, ctx, offset, whence):
- return self.bio.seek( offset, whence)
-
- def tell( self, ctx):
- ret = self.bio.tell()
- return ret
-
- def truncate( self, ctx):
- return self.bio.truncate()
-
- def write(self, ctx, data_raw, data_length):
- data = mupdf.raw_to_python_bytes(data_raw, data_length)
- return self.bio.write(data)
- def compute_scissor(dev):
- '''
- Every scissor of a clip is a sub rectangle of the preceding clip scissor
- if the clip level is larger.
- '''
- if dev.scissors is None:
- dev.scissors = list()
- num_scissors = len(dev.scissors)
- if num_scissors > 0:
- last_scissor = dev.scissors[num_scissors-1]
- scissor = JM_rect_from_py(last_scissor)
- scissor = mupdf.fz_intersect_rect(scissor, dev.pathrect)
- else:
- scissor = dev.pathrect
- dev.scissors.append(JM_py_from_rect(scissor))
- return scissor
- class JM_new_lineart_device_Device(mupdf.FzDevice2):
- '''
- LINEART device for Python method Page.get_cdrawings()
- '''
- #log(f'JM_new_lineart_device_Device()')
- def __init__(self, out, clips, method):
- #log(f'JM_new_lineart_device_Device.__init__()')
- super().__init__()
- # fixme: this results in "Unexpected call of unimplemented virtual_fnptrs fn FzDevice2::drop_device().".
- #self.use_virtual_drop_device()
- self.use_virtual_fill_path()
- self.use_virtual_stroke_path()
- self.use_virtual_clip_path()
- self.use_virtual_clip_image_mask()
- self.use_virtual_clip_stroke_path()
- self.use_virtual_clip_stroke_text()
- self.use_virtual_clip_text()
-
- self.use_virtual_fill_text
- self.use_virtual_stroke_text
- self.use_virtual_ignore_text
-
- self.use_virtual_fill_shade()
- self.use_virtual_fill_image()
- self.use_virtual_fill_image_mask()
-
- self.use_virtual_pop_clip()
-
- self.use_virtual_begin_group()
- self.use_virtual_end_group()
-
- self.use_virtual_begin_layer()
- self.use_virtual_end_layer()
-
- self.out = out
- self.seqno = 0
- self.depth = 0
- self.clips = clips
- self.method = method
-
- self.scissors = None
- self.layer_name = "" # optional content name
- self.pathrect = None
-
- self.linewidth = 0
- self.ptm = mupdf.FzMatrix()
- self.ctm = mupdf.FzMatrix()
- self.rot = mupdf.FzMatrix()
- self.lastpoint = mupdf.FzPoint()
- self.firstpoint = mupdf.FzPoint()
- self.havemove = 0
- self.pathrect = mupdf.FzRect()
- self.pathfactor = 0
- self.linecount = 0
- self.path_type = 0
-
- #drop_device = jm_lineart_drop_device
-
- fill_path = jm_lineart_fill_path
- stroke_path = jm_lineart_stroke_path
- clip_image_mask = jm_lineart_clip_image_mask
- clip_path = jm_lineart_clip_path
- clip_stroke_path = jm_lineart_clip_stroke_path
- clip_text = jm_lineart_clip_text
- clip_stroke_text = jm_lineart_clip_stroke_text
-
- fill_text = jm_increase_seqno
- stroke_text = jm_increase_seqno
- ignore_text = jm_increase_seqno
-
- fill_shade = jm_increase_seqno
- fill_image = jm_increase_seqno
- fill_image_mask = jm_increase_seqno
-
- pop_clip = jm_lineart_pop_clip
-
- begin_group = jm_lineart_begin_group
- end_group = jm_lineart_end_group
-
- begin_layer = jm_lineart_begin_layer
- end_layer = jm_lineart_end_layer
-
- class JM_new_texttrace_device(mupdf.FzDevice2):
- '''
- Trace TEXT device for Python method Page.get_texttrace()
- '''
- def __init__(self, out):
- super().__init__()
- self.use_virtual_fill_path()
- self.use_virtual_stroke_path()
- self.use_virtual_fill_text()
- self.use_virtual_stroke_text()
- self.use_virtual_ignore_text()
- self.use_virtual_fill_shade()
- self.use_virtual_fill_image()
- self.use_virtual_fill_image_mask()
-
- self.use_virtual_begin_layer()
- self.use_virtual_end_layer()
-
- self.out = out
-
- self.seqno = 0
- self.depth = 0
- self.clips = 0
- self.method = None
-
- self.seqno = 0
- self.pathdict = dict()
- self.scissors = list()
- self.linewidth = 0
- self.ptm = mupdf.FzMatrix()
- self.ctm = mupdf.FzMatrix()
- self.rot = mupdf.FzMatrix()
- self.lastpoint = mupdf.FzPoint()
- self.pathrect = mupdf.FzRect()
- self.pathfactor = 0
- self.linecount = 0
- self.path_type = 0
- self.layer_name = ""
-
- fill_path = jm_increase_seqno
- stroke_path = jm_dev_linewidth
- fill_text = jm_lineart_fill_text
- stroke_text = jm_lineart_stroke_text
- ignore_text = jm_lineart_ignore_text
- fill_shade = jm_increase_seqno
- fill_image = jm_increase_seqno
- fill_image_mask = jm_increase_seqno
-
- begin_layer = jm_lineart_begin_layer
- end_layer = jm_lineart_end_layer
- def ConversionHeader(i: str, filename: OptStr ="unknown"):
- t = i.lower()
- import textwrap
- html = textwrap.dedent("""
- <!DOCTYPE html>
- <html>
- <head>
- <style>
- body{background-color:gray}
- div{position:relative;background-color:white;margin:1em auto}
- p{position:absolute;margin:0}
- img{position:absolute}
- </style>
- </head>
- <body>
- """)
- xml = textwrap.dedent("""
- <?xml version="1.0"?>
- <document name="%s">
- """
- % filename
- )
- xhtml = textwrap.dedent("""
- <?xml version="1.0"?>
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <style>
- body{background-color:gray}
- div{background-color:white;margin:1em;padding:1em}
- p{white-space:pre-wrap}
- </style>
- </head>
- <body>
- """)
- text = ""
- json = '{"document": "%s", "pages": [\n' % filename
- if t == "html":
- r = html
- elif t == "json":
- r = json
- elif t == "xml":
- r = xml
- elif t == "xhtml":
- r = xhtml
- else:
- r = text
- return r
- def ConversionTrailer(i: str):
- t = i.lower()
- text = ""
- json = "]\n}"
- html = "</body>\n</html>\n"
- xml = "</document>\n"
- xhtml = html
- if t == "html":
- r = html
- elif t == "json":
- r = json
- elif t == "xml":
- r = xml
- elif t == "xhtml":
- r = xhtml
- else:
- r = text
- return r
- def annot_preprocess(page: "Page") -> int:
- """Prepare for annotation insertion on the page.
- Returns:
- Old page rotation value. Temporarily sets rotation to 0 when required.
- """
- CheckParent(page)
- if not page.parent.is_pdf:
- raise ValueError("is no PDF")
- old_rotation = page.rotation
- if old_rotation != 0:
- page.set_rotation(0)
- return old_rotation
- def annot_postprocess(page: "Page", annot: "Annot") -> None:
- """Clean up after annotation insertion.
- Set ownership flag and store annotation in page annotation dictionary.
- """
- #annot.parent = weakref.proxy(page)
- assert isinstance( page, Page)
- assert isinstance( annot, Annot)
- annot.parent = page
- page._annot_refs[id(annot)] = annot
- annot.thisown = True
- def canon(c):
- assert isinstance(c, int)
- # TODO: proper unicode case folding
- # TODO: character equivalence (a matches ä, etc)
- if c == 0xA0 or c == 0x2028 or c == 0x2029:
- return ord(' ')
- if c == ord('\r') or c == ord('\n') or c == ord('\t'):
- return ord(' ')
- if c >= ord('A') and c <= ord('Z'):
- return c - ord('A') + ord('a')
- return c
- def chartocanon(s):
- assert isinstance(s, str)
- n, c = mupdf.fz_chartorune(s)
- c = canon(c)
- return n, c
- def dest_is_valid(o, page_count, page_object_nums, names_list):
- p = mupdf.pdf_dict_get( o, PDF_NAME('A'))
- if (
- mupdf.pdf_name_eq(
- mupdf.pdf_dict_get( p, PDF_NAME('S')),
- PDF_NAME('GoTo')
- )
- and not string_in_names_list(
- mupdf.pdf_dict_get( p, PDF_NAME('D')),
- names_list
- )
- ):
- return 0
- p = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
- if not p.m_internal:
- pass
- elif mupdf.pdf_is_string( p):
- return string_in_names_list( p, names_list)
- elif not dest_is_valid_page(
- mupdf.pdf_array_get( p, 0),
- page_object_nums,
- page_count,
- ):
- return 0
- return 1
- def dest_is_valid_page(obj, page_object_nums, pagecount):
- num = mupdf.pdf_to_num(obj)
- if num == 0:
- return 0
- for i in range(pagecount):
- if page_object_nums[i] == num:
- return 1
- return 0
- def find_string(s, needle):
- assert isinstance(s, str)
- for i in range(len(s)):
- end = match_string(s[i:], needle)
- if end is not None:
- end += i
- return i, end
- return None, None
- def get_pdf_now() -> str:
- '''
- "Now" timestamp in PDF Format
- '''
- import time
- tz = "%s'%s'" % (
- str(abs(time.altzone // 3600)).rjust(2, "0"),
- str((abs(time.altzone // 60) % 60)).rjust(2, "0"),
- )
- tstamp = time.strftime("D:%Y%m%d%H%M%S", time.localtime())
- if time.altzone > 0:
- tstamp += "-" + tz
- elif time.altzone < 0:
- tstamp += "+" + tz
- else:
- pass
- return tstamp
- class ElementPosition(object):
- """Convert a dictionary with element position information to an object."""
- def __init__(self):
- pass
- def make_story_elpos():
- return ElementPosition()
-
- def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list:
- """Return rectangles of text lines between two points.
- Notes:
- The default of 'start' is top-left of 'clip'. The default of 'stop'
- is bottom-reight of 'clip'.
- Args:
- start: start point_like
- stop: end point_like, must be 'below' start
- clip: consider this rect_like only, default is page rectangle
- Returns:
- List of line bbox intersections with the area established by the
- parameters.
- """
- # validate and normalize arguments
- if clip is None:
- clip = page.rect
- clip = Rect(clip)
- if start is None:
- start = clip.tl
- if stop is None:
- stop = clip.br
- clip.y0 = start.y
- clip.y1 = stop.y
- if clip.is_empty or clip.is_infinite:
- return []
- # extract text of page, clip only, no images, expand ligatures
- blocks = page.get_text(
- "dict", flags=0, clip=clip,
- )["blocks"]
- lines = [] # will return this list of rectangles
- for b in blocks:
- bbox = Rect(b["bbox"])
- if bbox.is_infinite or bbox.is_empty:
- continue
- for line in b["lines"]:
- bbox = Rect(line["bbox"])
- if bbox.is_infinite or bbox.is_empty:
- continue
- lines.append(bbox)
- if lines == []: # did not select anything
- return lines
- lines.sort(key=lambda bbox: bbox.y1) # sort by vertical positions
- # cut off prefix from first line if start point is close to its top
- bboxf = lines.pop(0)
- if bboxf.y0 - start.y <= 0.1 * bboxf.height: # close enough?
- r = Rect(start.x, bboxf.y0, bboxf.br) # intersection rectangle
- if not (r.is_empty or r.is_infinite):
- lines.insert(0, r) # insert again if not empty
- else:
- lines.insert(0, bboxf) # insert again
- if lines == []: # the list might have been emptied
- return lines
- # cut off suffix from last line if stop point is close to its bottom
- bboxl = lines.pop()
- if stop.y - bboxl.y1 <= 0.1 * bboxl.height: # close enough?
- r = Rect(bboxl.tl, stop.x, bboxl.y1) # intersection rectangle
- if not (r.is_empty or r.is_infinite):
- lines.append(r) # append if not empty
- else:
- lines.append(bboxl) # append again
- return lines
- def glyph_name_to_unicode(name: str) -> int:
- """Convenience function accessing unicodedata."""
- import unicodedata
- try:
- unc = ord(unicodedata.lookup(name))
- except Exception:
- unc = 65533
- return unc
- def hdist(dir, a, b):
- dx = b.x - a.x
- dy = b.y - a.y
- return mupdf.fz_abs(dx * dir.x + dy * dir.y)
- def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> list:
- """Return a list of (rows x cols) equal sized rectangles.
- Notes:
- A utility to fill a given area with table cells of equal size.
- Args:
- rect: rect_like to use as the table area
- rows: number of rows
- cols: number of columns
- Returns:
- A list with <rows> items, where each item is a list of <cols>
- PyMuPDF Rect objects of equal sizes.
- """
- rect = Rect(rect) # ensure this is a Rect
- if rect.is_empty or rect.is_infinite:
- raise ValueError("rect must be finite and not empty")
- tl = rect.tl
- height = rect.height / rows # height of one table cell
- width = rect.width / cols # width of one table cell
- delta_h = (width, 0, width, 0) # diff to next right rect
- delta_v = (0, height, 0, height) # diff to next lower rect
- r = Rect(tl, tl.x + width, tl.y + height) # first rectangle
- # make the first row
- row = [r]
- for i in range(1, cols):
- r += delta_h # build next rect to the right
- row.append(r)
- # make result, starts with first row
- rects = [row]
- for i in range(1, rows):
- row = rects[i - 1] # take previously appended row
- nrow = [] # the new row to append
- for r in row: # for each previous cell add its downward copy
- nrow.append(r + delta_v)
- rects.append(nrow) # append new row to result
- return rects
- def util_ensure_widget_calc(annot):
- '''
- Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO
- '''
- annot_obj = mupdf.pdf_annot_obj(annot.this)
- pdf = mupdf.pdf_get_bound_document(annot_obj)
- PDFNAME_CO = mupdf.pdf_new_name("CO") # = PDF_NAME(CO)
- acro = mupdf.pdf_dict_getl( # get AcroForm dict
- mupdf.pdf_trailer(pdf),
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- )
- CO = mupdf.pdf_dict_get(acro, PDFNAME_CO) # = AcroForm/CO
- if not mupdf.pdf_is_array(CO):
- CO = mupdf.pdf_dict_put_array(acro, PDFNAME_CO, 2)
- n = mupdf.pdf_array_len(CO)
- found = 0
- xref = mupdf.pdf_to_num(annot_obj)
- for i in range(n):
- nxref = mupdf.pdf_to_num(mupdf.pdf_array_get(CO, i))
- if xref == nxref:
- found = 1
- break
- if not found:
- mupdf.pdf_array_push(CO, mupdf.pdf_new_indirect(pdf, xref, 0))
- def util_make_rect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
- '''
- Helper for initialising rectangle classes.
-
- 2022-09-02: This is quite different from PyMuPDF's util_make_rect(), which
- uses `goto` in ways that don't easily translate to Python.
- Returns (x0, y0, x1, y1) derived from <args>, then override with p0, p1,
- x0, y0, x1, y1 if they are not None.
- Accepts following forms for <args>:
- () returns all zeros.
- (top-left, bottom-right)
- (top-left, x1, y1)
- (x0, y0, bottom-right)
- (x0, y0, x1, y1)
- (rect)
- Where top-left and bottom-right are (x, y) or something with .x, .y
- members; rect is something with .x0, .y0, .x1, and .y1 members.
- 2023-11-18: we now override with p0, p1, x0, y0, x1, y1 if not None.
- '''
- def get_xy( arg):
- if isinstance( arg, (list, tuple)) and len( arg) == 2:
- return arg[0], arg[1]
- if isinstance( arg, (Point, mupdf.FzPoint, mupdf.fz_point)):
- return arg.x, arg.y
- return None, None
- def make_tuple( a):
- if isinstance( a, tuple):
- return a
- if isinstance( a, Point):
- return a.x, a.y
- elif isinstance( a, (Rect, IRect, mupdf.FzRect, mupdf.fz_rect)):
- return a.x0, a.y0, a.x1, a.y1
- if not isinstance( a, (list, tuple)):
- a = a,
- return a
- def handle_args():
- if len(args) == 0:
- return 0, 0, 0, 0
- elif len(args) == 1:
- arg = args[0]
- if isinstance( arg, (list, tuple)) and len( arg) == 2:
- p1, p2 = arg
- ret = *p1, *p2
- assert len(ret) == 4
- return ret
- if isinstance( arg, (list, tuple)) and len( arg) == 3:
- a, b, c = arg
- a = make_tuple(a)
- b = make_tuple(b)
- c = make_tuple(c)
- ret = *a, *b, *c
- assert len(ret) == 4
- return ret
- ret = make_tuple( arg)
- assert len(ret) == 4, f'{arg=} {ret=}'
- return ret
- elif len(args) == 2:
- ret = get_xy( args[0]) + get_xy( args[1])
- assert len(ret) == 4
- return ret
- elif len(args) == 3:
- x0, y0 = get_xy( args[0])
- if (x0, y0) != (None, None):
- return x0, y0, args[1], args[2]
- x1, y1 = get_xy( args[2])
- if (x1, y1) != (None, None):
- return args[0], args[1], x1, y1
- elif len(args) == 4:
- return args[0], args[1], args[2], args[3]
- raise Exception( f'Unrecognised args: {args}')
- ret_x0, ret_y0, ret_x1, ret_y1 = handle_args()
- if p0 is not None: ret_x0, ret_y0 = get_xy(p0)
- if p1 is not None: ret_x1, ret_y1 = get_xy(p1)
- if x0 is not None: ret_x0 = x0
- if y0 is not None: ret_y0 = y0
- if x1 is not None: ret_x1 = x1
- if y1 is not None: ret_y1 = y1
- return ret_x0, ret_y0, ret_x1, ret_y1
- def util_make_irect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
- a, b, c, d = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
- def convert(x, ceil):
- if ceil:
- return int(math.ceil(x))
- else:
- return int(math.floor(x))
- a = convert(a, False)
- b = convert(b, False)
- c = convert(c, True)
- d = convert(d, True)
- return a, b, c, d
- def util_round_rect( rect):
- return JM_py_from_irect(mupdf.fz_round_rect(JM_rect_from_py(rect)))
- def util_transform_rect( rect, matrix):
- if g_use_extra:
- return extra.util_transform_rect( rect, matrix)
- return JM_py_from_rect(mupdf.fz_transform_rect(JM_rect_from_py(rect), JM_matrix_from_py(matrix)))
- def util_intersect_rect( r1, r2):
- return JM_py_from_rect(
- mupdf.fz_intersect_rect(
- JM_rect_from_py(r1),
- JM_rect_from_py(r2),
- )
- )
- def util_is_point_in_rect( p, r):
- return mupdf.fz_is_point_inside_rect(
- JM_point_from_py(p),
- JM_rect_from_py(r),
- )
- def util_include_point_in_rect( r, p):
- return JM_py_from_rect(
- mupdf.fz_include_point_in_rect(
- JM_rect_from_py(r),
- JM_point_from_py(p),
- )
- )
- def util_point_in_quad( P, Q):
- p = JM_point_from_py(P)
- q = JM_quad_from_py(Q)
- return mupdf.fz_is_point_inside_quad(p, q)
- def util_transform_point( point, matrix):
- return JM_py_from_point(
- mupdf.fz_transform_point(
- JM_point_from_py(point),
- JM_matrix_from_py(matrix),
- )
- )
- def util_union_rect( r1, r2):
- return JM_py_from_rect(
- mupdf.fz_union_rect(
- JM_rect_from_py(r1),
- JM_rect_from_py(r2),
- )
- )
- def util_concat_matrix( m1, m2):
- return JM_py_from_matrix(
- mupdf.fz_concat(
- JM_matrix_from_py(m1),
- JM_matrix_from_py(m2),
- )
- )
- def util_invert_matrix(matrix):
- if 0:
- # Use MuPDF's fz_invert_matrix().
- if isinstance( matrix, (tuple, list)):
- matrix = mupdf.FzMatrix( *matrix)
- elif isinstance( matrix, mupdf.fz_matrix):
- matrix = mupdf.FzMatrix( matrix)
- elif isinstance( matrix, Matrix):
- matrix = mupdf.FzMatrix( matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f)
- assert isinstance( matrix, mupdf.FzMatrix), f'{type(matrix)=}: {matrix}'
- ret = mupdf.fz_invert_matrix( matrix)
- if ret == matrix and (0
- or abs( matrix.a - 1) >= sys.float_info.epsilon
- or abs( matrix.b - 0) >= sys.float_info.epsilon
- or abs( matrix.c - 0) >= sys.float_info.epsilon
- or abs( matrix.d - 1) >= sys.float_info.epsilon
- ):
- # Inversion not possible.
- return 1, ()
- return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f)
- # Do inversion in python.
- src = JM_matrix_from_py(matrix)
- a = src.a
- det = a * src.d - src.b * src.c
- if det < -sys.float_info.epsilon or det > sys.float_info.epsilon:
- dst = mupdf.FzMatrix()
- rdet = 1 / det
- dst.a = src.d * rdet
- dst.b = -src.b * rdet
- dst.c = -src.c * rdet
- dst.d = a * rdet
- a = -src.e * dst.a - src.f * dst.c
- dst.f = -src.e * dst.b - src.f * dst.d
- dst.e = a
- return 0, (dst.a, dst.b, dst.c, dst.d, dst.e, dst.f)
- return 1, ()
- def util_measure_string( text, fontname, fontsize, encoding):
- font = mupdf.fz_new_base14_font(fontname)
- w = 0
- pos = 0
- while pos < len(text):
- t, c = mupdf.fz_chartorune(text[pos:])
- pos += t
- if encoding == mupdf.PDF_SIMPLE_ENCODING_GREEK:
- c = mupdf.fz_iso8859_7_from_unicode(c)
- elif encoding == mupdf.PDF_SIMPLE_ENCODING_CYRILLIC:
- c = mupdf.fz_windows_1251_from_unicode(c)
- else:
- c = mupdf.fz_windows_1252_from_unicode(c)
- if c < 0:
- c = 0xB7
- g = mupdf.fz_encode_character(font, c)
- dw = mupdf.fz_advance_glyph(font, g, 0)
- w += dw
- ret = w * fontsize
- return ret
- def util_sine_between(C, P, Q):
- # for points C, P, Q compute the sine between lines CP and QP
- c = JM_point_from_py(C)
- p = JM_point_from_py(P)
- q = JM_point_from_py(Q)
- s = mupdf.fz_normalize_vector(mupdf.fz_make_point(q.x - p.x, q.y - p.y))
- m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -p.x, -p.y)
- m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
- m1 = mupdf.fz_concat(m1, m2)
- c = mupdf.fz_transform_point(c, m1)
- c = mupdf.fz_normalize_vector(c)
- return c.y
- def util_hor_matrix(C, P):
- '''
- Return the matrix that maps two points C, P to the x-axis such that
- C -> (0,0) and the image of P have the same distance.
- '''
- c = JM_point_from_py(C)
- p = JM_point_from_py(P)
-
- # compute (cosine, sine) of vector P-C with double precision:
- s = mupdf.fz_normalize_vector(mupdf.fz_make_point(p.x - c.x, p.y - c.y))
-
- m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -c.x, -c.y)
- m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
- return JM_py_from_matrix(mupdf.fz_concat(m1, m2))
- def match_string(h0, n0):
- h = 0
- n = 0
- e = h
- delta_h, hc = chartocanon(h0[h:])
- h += delta_h
- delta_n, nc = chartocanon(n0[n:])
- n += delta_n
- while hc == nc:
- e = h
- if hc == ord(' '):
- while 1:
- delta_h, hc = chartocanon(h0[h:])
- h += delta_h
- if hc != ord(' '):
- break
- else:
- delta_h, hc = chartocanon(h0[h:])
- h += delta_h
- if nc == ord(' '):
- while 1:
- delta_n, nc = chartocanon(n0[n:])
- n += delta_n
- if nc != ord(' '):
- break
- else:
- delta_n, nc = chartocanon(n0[n:])
- n += delta_n
- return None if nc != 0 else e
- def on_highlight_char(hits, line, ch):
- assert hits
- assert isinstance(line, mupdf.FzStextLine)
- assert isinstance(ch, mupdf.FzStextChar)
- vfuzz = ch.m_internal.size * hits.vfuzz
- hfuzz = ch.m_internal.size * hits.hfuzz
- ch_quad = JM_char_quad(line, ch)
- if hits.len > 0:
- # fixme: end = hits.quads[-1]
- quad = hits.quads[hits.len - 1]
- end = JM_quad_from_py(quad)
- if ( 1
- and hdist(line.m_internal.dir, end.lr, ch_quad.ll) < hfuzz
- and vdist(line.m_internal.dir, end.lr, ch_quad.ll) < vfuzz
- and hdist(line.m_internal.dir, end.ur, ch_quad.ul) < hfuzz
- and vdist(line.m_internal.dir, end.ur, ch_quad.ul) < vfuzz
- ):
- end.ur = ch_quad.ur
- end.lr = ch_quad.lr
- assert hits.quads[-1] == end
- return
- hits.quads.append(ch_quad)
- hits.len += 1
- def page_merge(doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map):
- '''
- Deep-copies a source page to the target.
- Modified version of function of pdfmerge.c: we also copy annotations, but
- we skip some subtypes. In addition we rotate output.
- '''
- if g_use_extra:
- #log( 'Calling C++ extra.page_merge()')
- return extra.page_merge( doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map)
-
- # list of object types (per page) we want to copy
- known_page_objs = [
- PDF_NAME('Contents'),
- PDF_NAME('Resources'),
- PDF_NAME('MediaBox'),
- PDF_NAME('CropBox'),
- PDF_NAME('BleedBox'),
- PDF_NAME('TrimBox'),
- PDF_NAME('ArtBox'),
- PDF_NAME('Rotate'),
- PDF_NAME('UserUnit'),
- ]
- page_ref = mupdf.pdf_lookup_page_obj(doc_src, page_from)
- # make new page dict in dest doc
- page_dict = mupdf.pdf_new_dict(doc_des, 4)
- mupdf.pdf_dict_put(page_dict, PDF_NAME('Type'), PDF_NAME('Page'))
- # copy objects of source page into it
- for i in range( len(known_page_objs)):
- obj = mupdf.pdf_dict_get_inheritable( page_ref, known_page_objs[i])
- if obj.m_internal:
- #log( '{=type(graft_map) type(graft_map.this)}')
- mupdf.pdf_dict_put( page_dict, known_page_objs[i], mupdf.pdf_graft_mapped_object(graft_map.this, obj))
- # Copy annotations, but skip Link, Popup, IRT, Widget types
- # If selected, remove dict keys P (parent) and Popup
- if copy_annots:
- old_annots = mupdf.pdf_dict_get( page_ref, PDF_NAME('Annots'))
- n = mupdf.pdf_array_len( old_annots)
- if n > 0:
- new_annots = mupdf.pdf_dict_put_array( page_dict, PDF_NAME('Annots'), n)
- for i in range(n):
- o = mupdf.pdf_array_get( old_annots, i)
- if not o.m_internal or not mupdf.pdf_is_dict(o):
- continue # skip non-dict items
- if mupdf.pdf_dict_gets( o, "IRT").m_internal:
- continue
- subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
- if mupdf.pdf_name_eq( subtype, PDF_NAME('Link')):
- continue
- if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
- continue
- if mupdf.pdf_name_eq(subtype, PDF_NAME('Widget')):
- continue
- mupdf.pdf_dict_del( o, PDF_NAME('Popup'))
- mupdf.pdf_dict_del( o, PDF_NAME('P'))
- copy_o = mupdf.pdf_graft_mapped_object( graft_map.this, o)
- annot = mupdf.pdf_new_indirect( doc_des, mupdf.pdf_to_num( copy_o), 0)
- mupdf.pdf_array_push( new_annots, annot)
- # rotate the page
- if rotate != -1:
- mupdf.pdf_dict_put_int( page_dict, PDF_NAME('Rotate'), rotate)
- # Now add the page dictionary to dest PDF
- ref = mupdf.pdf_add_object( doc_des, page_dict)
- # Insert new page at specified location
- mupdf.pdf_insert_page( doc_des, page_to, ref)
- def paper_rect(s: str) -> Rect:
- """Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked.
- """
- width, height = paper_size(s)
- return Rect(0.0, 0.0, width, height)
- def paper_size(s: str) -> tuple:
- """Return a tuple (width, height) for a given paper format string.
- Notes:
- 'A4-L' will return (842, 595), the values for A4 landscape.
- Suffix '-P' and no suffix return the portrait tuple.
- """
- size = s.lower()
- f = "p"
- if size.endswith("-l"):
- f = "l"
- size = size[:-2]
- if size.endswith("-p"):
- size = size[:-2]
- rc = paper_sizes().get(size, (-1, -1))
- if f == "p":
- return rc
- return (rc[1], rc[0])
- def paper_sizes():
- """Known paper formats @ 72 dpi as a dictionary. Key is the format string
- like "a4" for ISO-A4. Value is the tuple (width, height).
- Information taken from the following web sites:
- www.din-formate.de
- www.din-formate.info/amerikanische-formate.html
- www.directtools.de/wissen/normen/iso.htm
- """
- return {
- "a0": (2384, 3370),
- "a1": (1684, 2384),
- "a10": (74, 105),
- "a2": (1191, 1684),
- "a3": (842, 1191),
- "a4": (595, 842),
- "a5": (420, 595),
- "a6": (298, 420),
- "a7": (210, 298),
- "a8": (147, 210),
- "a9": (105, 147),
- "b0": (2835, 4008),
- "b1": (2004, 2835),
- "b10": (88, 125),
- "b2": (1417, 2004),
- "b3": (1001, 1417),
- "b4": (709, 1001),
- "b5": (499, 709),
- "b6": (354, 499),
- "b7": (249, 354),
- "b8": (176, 249),
- "b9": (125, 176),
- "c0": (2599, 3677),
- "c1": (1837, 2599),
- "c10": (79, 113),
- "c2": (1298, 1837),
- "c3": (918, 1298),
- "c4": (649, 918),
- "c5": (459, 649),
- "c6": (323, 459),
- "c7": (230, 323),
- "c8": (162, 230),
- "c9": (113, 162),
- "card-4x6": (288, 432),
- "card-5x7": (360, 504),
- "commercial": (297, 684),
- "executive": (522, 756),
- "invoice": (396, 612),
- "ledger": (792, 1224),
- "legal": (612, 1008),
- "legal-13": (612, 936),
- "letter": (612, 792),
- "monarch": (279, 540),
- "tabloid-extra": (864, 1296),
- }
- def pdf_lookup_page_loc(doc, needle):
- return mupdf.pdf_lookup_page_loc(doc, needle)
- def pdfobj_string(o, prefix=''):
- '''
- Returns description of mupdf.PdfObj (wrapper for pdf_obj) <o>.
- '''
- assert 0, 'use mupdf.pdf_debug_obj() ?'
- ret = ''
- if mupdf.pdf_is_array(o):
- l = mupdf.pdf_array_len(o)
- ret += f'array {l}\n'
- for i in range(l):
- oo = mupdf.pdf_array_get(o, i)
- ret += pdfobj_string(oo, prefix + ' ')
- ret += '\n'
- elif mupdf.pdf_is_bool(o):
- ret += f'bool: {o.array_get_bool()}\n'
- elif mupdf.pdf_is_dict(o):
- l = mupdf.pdf_dict_len(o)
- ret += f'dict {l}\n'
- for i in range(l):
- key = mupdf.pdf_dict_get_key(o, i)
- value = mupdf.pdf_dict_get( o, key)
- ret += f'{prefix} {key}: '
- ret += pdfobj_string( value, prefix + ' ')
- ret += '\n'
- elif mupdf.pdf_is_embedded_file(o):
- ret += f'embedded_file: {o.embedded_file_name()}\n'
- elif mupdf.pdf_is_indirect(o):
- ret += f'indirect: ...\n'
- elif mupdf.pdf_is_int(o):
- ret += f'int: {mupdf.pdf_to_int(o)}\n'
- elif mupdf.pdf_is_jpx_image(o):
- ret += f'jpx_image:\n'
- elif mupdf.pdf_is_name(o):
- ret += f'name: {mupdf.pdf_to_name(o)}\n'
- elif o.pdf_is_null:
- ret += f'null\n'
- #elif o.pdf_is_number:
- # ret += f'number\n'
- elif o.pdf_is_real:
- ret += f'real: {o.pdf_to_real()}\n'
- elif mupdf.pdf_is_stream(o):
- ret += f'stream\n'
- elif mupdf.pdf_is_string(o):
- ret += f'string: {mupdf.pdf_to_string(o)}\n'
- else:
- ret += '<>\n'
- return ret
- def repair_mono_font(page: "Page", font: "Font") -> None:
- """Repair character spacing for mono fonts.
- Notes:
- Some mono-spaced fonts are displayed with a too large character
- distance, e.g. "a b c" instead of "abc". This utility adds an entry
- "/W[0 65535 w]" to the descendent font(s) of font. The float w is
- taken to be the width of 0x20 (space).
- This should enforce viewers to use 'w' as the character width.
- Args:
- page: pymupdf.Page object.
- font: pymupdf.Font object.
- """
- if not font.flags["mono"]: # font not flagged as monospaced
- return None
- doc = page.parent # the document
- fontlist = page.get_fonts() # list of fonts on page
- xrefs = [ # list of objects referring to font
- f[0]
- for f in fontlist
- if (f[3] == font.name and f[4].startswith("F") and f[5].startswith("Identity"))
- ]
- if xrefs == []: # our font does not occur
- return
- xrefs = set(xrefs) # drop any double counts
- width = int(round((font.glyph_advance(32) * 1000)))
- for xref in xrefs:
- if not TOOLS.set_font_width(doc, xref, width):
- log("Cannot set width for '%s' in xref %i" % (font.name, xref))
- def sRGB_to_pdf(srgb: int) -> tuple:
- """Convert sRGB color code to a PDF color triple.
- There is **no error checking** for performance reasons!
- Args:
- srgb: (int) RRGGBB (red, green, blue), each color in range(255).
- Returns:
- Tuple (red, green, blue) each item in interval 0 <= item <= 1.
- """
- t = sRGB_to_rgb(srgb)
- return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0
- def sRGB_to_rgb(srgb: int) -> tuple:
- """Convert sRGB color code to an RGB color triple.
- There is **no error checking** for performance reasons!
- Args:
- srgb: (int) SSRRGGBB (red, green, blue), each color in range(255).
- With MuPDF < 1.26, `s` is always 0.
- Returns:
- Tuple (red, green, blue) each item in interval 0 <= item <= 255.
- """
- srgb &= 0xffffff
- r = srgb >> 16
- g = (srgb - (r << 16)) >> 8
- b = srgb - (r << 16) - (g << 8)
- return (r, g, b)
- def string_in_names_list(p, names_list):
- n = mupdf.pdf_array_len( names_list) if names_list else 0
- str_ = mupdf.pdf_to_text_string( p)
- for i in range(0, n, 2):
- if mupdf.pdf_to_text_string( mupdf.pdf_array_get( names_list, i)) == str_:
- return 1
- return 0
- def strip_outline(doc, outlines, page_count, page_object_nums, names_list):
- '''
- Returns (count, first, prev).
- '''
- first = None
- count = 0
- current = outlines
- prev = None
- while current.m_internal:
- # Strip any children to start with. This takes care of
- # First / Last / Count for us.
- nc = strip_outlines(doc, current, page_count, page_object_nums, names_list)
- if not dest_is_valid(current, page_count, page_object_nums, names_list):
- if nc == 0:
- # Outline with invalid dest and no children. Drop it by
- # pulling the next one in here.
- next = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
- if not next.m_internal:
- # There is no next one to pull in
- if prev.m_internal:
- mupdf.pdf_dict_del(prev, PDF_NAME('Next'))
- elif prev.m_internal:
- mupdf.pdf_dict_put(prev, PDF_NAME('Next'), next)
- mupdf.pdf_dict_put(next, PDF_NAME('Prev'), prev)
- else:
- mupdf.pdf_dict_del(next, PDF_NAME('Prev'))
- current = next
- else:
- # Outline with invalid dest, but children. Just drop the dest.
- mupdf.pdf_dict_del(current, PDF_NAME('Dest'))
- mupdf.pdf_dict_del(current, PDF_NAME('A'))
- current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
- else:
- # Keep this one
- if not first or not first.m_internal:
- first = current
- prev = current
- current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
- count += 1
- return count, first, prev
- def strip_outlines(doc, outlines, page_count, page_object_nums, names_list):
- if not outlines.m_internal:
- return 0
- first = mupdf.pdf_dict_get(outlines, PDF_NAME('First'))
- if not first.m_internal:
- nc = 0
- else:
- nc, first, last = strip_outline(doc, first, page_count, page_object_nums, names_list)
- if nc == 0:
- mupdf.pdf_dict_del(outlines, PDF_NAME('First'))
- mupdf.pdf_dict_del(outlines, PDF_NAME('Last'))
- mupdf.pdf_dict_del(outlines, PDF_NAME('Count'))
- else:
- old_count = mupdf.pdf_to_int(mupdf.pdf_dict_get(outlines, PDF_NAME('Count')))
- mupdf.pdf_dict_put(outlines, PDF_NAME('First'), first)
- mupdf.pdf_dict_put(outlines, PDF_NAME('Last'), last)
- mupdf.pdf_dict_put(outlines, PDF_NAME('Count'), mupdf.pdf_new_int(nc if old_count > 0 else -nc))
- return nc
- trace_device_FILL_PATH = 1
- trace_device_STROKE_PATH = 2
- trace_device_CLIP_PATH = 3
- trace_device_CLIP_STROKE_PATH = 4
- def unicode_to_glyph_name(ch: int) -> str:
- """
- Convenience function accessing unicodedata.
- """
- import unicodedata
- try:
- name = unicodedata.name(chr(ch))
- except ValueError:
- name = ".notdef"
- return name
- def vdist(dir, a, b):
- dx = b.x - a.x
- dy = b.y - a.y
- return mupdf.fz_abs(dx * dir.y + dy * dir.x)
- def apply_pages(
- path,
- pagefn,
- *,
- pagefn_args=(),
- pagefn_kwargs=dict(),
- initfn=None,
- initfn_args=(),
- initfn_kwargs=dict(),
- pages=None,
- method='single',
- concurrency=None,
- _stats=False,
- ):
- '''
- Returns list of results from `pagefn()`, optionally using concurrency for
- speed.
-
- Args:
- path:
- Path of document.
- pagefn:
- Function to call for each page; is passed (page, *pagefn_args,
- **pagefn_kwargs). Return value is added to list that we return. If
- `method` is not 'single', must be a top-level function - nested
- functions don't work with concurrency.
- pagefn_args
- pagefn_kwargs:
- Additional args to pass to `pagefn`. Must be picklable.
- initfn:
- If true, called once in each worker process; is passed
- (*initfn_args, **initfn_kwargs).
- initfn_args
- initfn_kwargs:
- Args to pass to initfn. Must be picklable.
- pages:
- List of page numbers to process, or None to include all pages.
- method:
- 'single'
- Do not use concurrency.
- 'mp'
- Operate concurrently using Python's `multiprocessing` module.
- 'fork'
- Operate concurrently using custom implementation with
- `os.fork()`. Does not work on Windows.
- concurrency:
- Number of worker processes to use when operating concurrently. If
- None, we use the number of available CPUs.
- _stats:
- Internal, may change or be removed. If true, we output simple
- timing diagnostics.
-
- Note: We require a file path rather than a Document, because Document
- instances do not work properly after a fork - internal file descriptor
- offsets are shared between the parent and child processes.
- '''
- if _stats:
- t0 = time.time()
-
- if method == 'single':
- if initfn:
- initfn(*initfn_args, **initfn_kwargs)
- ret = list()
- document = Document(path)
- if pages is None:
- pages = range(len(document))
- for pno in pages:
- page = document[pno]
- r = pagefn(page, *pagefn_args, **initfn_kwargs)
- ret.append(r)
-
- else:
- # Use concurrency.
- #
- from . import _apply_pages
-
- if pages is None:
- if _stats:
- t = time.time()
- with Document(path) as document:
- num_pages = len(document)
- pages = list(range(num_pages))
- if _stats:
- t = time.time() - t
- log(f'{t:.2f}s: count pages.')
-
- if _stats:
- t = time.time()
-
- if method == 'mp':
- ret = _apply_pages._multiprocessing(
- path,
- pages,
- pagefn,
- pagefn_args,
- pagefn_kwargs,
- initfn,
- initfn_args,
- initfn_kwargs,
- concurrency,
- _stats,
- )
-
- elif method == 'fork':
- ret = _apply_pages._fork(
- path,
- pages,
- pagefn,
- pagefn_args,
- pagefn_kwargs,
- initfn,
- initfn_args,
- initfn_kwargs,
- concurrency,
- _stats,
- )
-
- else:
- assert 0, f'Unrecognised {method=}.'
-
- if _stats:
- t = time.time() - t
- log(f'{t:.2f}s: work.')
- if _stats:
- t = time.time() - t0
- log(f'{t:.2f}s: total.')
- return ret
- def get_text(
- path,
- *,
- pages=None,
- method='single',
- concurrency=None,
-
- option='text',
- clip=None,
- flags=None,
- textpage=None,
- sort=False,
- delimiters=None,
-
- _stats=False,
- ):
- '''
- Returns list of results from `Page.get_text()`, optionally using
- concurrency for speed.
-
- Args:
- path:
- Path of document.
- pages:
- List of page numbers to process, or None to include all pages.
- method:
- 'single'
- Do not use concurrency.
- 'mp'
- Operate concurrently using Python's `multiprocessing` module.
- 'fork'
- Operate concurrently using custom implementation with
- `os.fork`. Does not work on Windows.
- concurrency:
- Number of worker processes to use when operating concurrently. If
- None, we use the number of available CPUs.
- option
- clip
- flags
- textpage
- sort
- delimiters:
- Passed to internal calls to `Page.get_text()`.
- '''
- args_dict = dict(
- option=option,
- clip=clip,
- flags=flags,
- textpage=textpage,
- sort=sort,
- delimiters=delimiters,
- )
-
- return apply_pages(
- path,
- Page.get_text,
- pagefn_kwargs=args_dict,
- pages=pages,
- method=method,
- concurrency=concurrency,
- _stats=_stats,
- )
- class TOOLS:
- '''
- We use @staticmethod to avoid the need to create an instance of this class.
- '''
- def _derotate_matrix(page):
- if isinstance(page, mupdf.PdfPage):
- return JM_py_from_matrix(JM_derotate_page_matrix(page))
- else:
- return JM_py_from_matrix(mupdf.FzMatrix())
- @staticmethod
- def _fill_widget(annot, widget):
- val = JM_get_widget_properties(annot, widget)
- widget.rect = Rect(annot.rect)
- widget.xref = annot.xref
- widget.parent = annot.parent
- widget._annot = annot # backpointer to annot object
- if not widget.script:
- widget.script = None
- if not widget.script_stroke:
- widget.script_stroke = None
- if not widget.script_format:
- widget.script_format = None
- if not widget.script_change:
- widget.script_change = None
- if not widget.script_calc:
- widget.script_calc = None
- if not widget.script_blur:
- widget.script_blur = None
- if not widget.script_focus:
- widget.script_focus = None
- return val
- @staticmethod
- def _get_all_contents(page):
- page = _as_pdf_page(page.this)
- res = JM_read_contents(page.obj())
- result = JM_BinFromBuffer( res)
- return result
- @staticmethod
- def _insert_contents(page, newcont, overlay=1):
- """Add bytes as a new /Contents object for a page, and return its xref."""
- pdfpage = _as_pdf_page(page, required=1)
- contbuf = JM_BufferFromBytes(newcont)
- xref = JM_insert_contents(pdfpage.doc(), pdfpage.obj(), contbuf, overlay)
- #fixme: pdfpage->doc->dirty = 1;
- return xref
- @staticmethod
- def _le_annot_parms(annot, p1, p2, fill_color):
- """Get common parameters for making annot line end symbols.
- Returns:
- m: matrix that maps p1, p2 to points L, P on the x-axis
- im: its inverse
- L, P: transformed p1, p2
- w: line width
- scol: stroke color string
- fcol: fill color store_shrink
- opacity: opacity string (gs command)
- """
- w = annot.border["width"] # line width
- sc = annot.colors["stroke"] # stroke color
- if not sc: # black if missing
- sc = (0,0,0)
- scol = " ".join(map(str, sc)) + " RG\n"
- if fill_color:
- fc = fill_color
- else:
- fc = annot.colors["fill"] # fill color
- if not fc:
- fc = (1,1,1) # white if missing
- fcol = " ".join(map(str, fc)) + " rg\n"
- # nr = annot.rect
- np1 = p1 # point coord relative to annot rect
- np2 = p2 # point coord relative to annot rect
- m = Matrix(util_hor_matrix(np1, np2)) # matrix makes the line horizontal
- im = ~m # inverted matrix
- L = np1 * m # converted start (left) point
- R = np2 * m # converted end (right) point
- if 0 <= annot.opacity < 1:
- opacity = "/H gs\n"
- else:
- opacity = ""
- return m, im, L, R, w, scol, fcol, opacity
- @staticmethod
- def _le_butt(annot, p1, p2, lr, fill_color):
- """Make stream commands for butt line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 3
- d = shift * max(1, w)
- M = R if lr else L
- top = (M + (0, -d/2.)) * im
- bot = (M + (0, d/2.)) * im
- ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
- ap += "%f %f l\n" % (bot.x, bot.y)
- ap += _format_g(w) + " w\n"
- ap += scol + "s\nQ\n"
- return ap
- @staticmethod
- def _le_circle(annot, p1, p2, lr, fill_color):
- """Make stream commands for circle line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5 # 2*shift*width = length of square edge
- d = shift * max(1, w)
- M = R - (d/2., 0) if lr else L + (d/2., 0)
- r = Rect(M, M) + (-d, -d, d, d) # the square
- ap = "q\n" + opacity + TOOLS._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _le_closedarrow(annot, p1, p2, lr, fill_color):
- """Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5
- d = shift * max(1, w)
- p2 = R + (d/2., 0) if lr else L - (d/2., 0)
- p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
- p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
- p1 *= im
- p2 *= im
- p3 *= im
- ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
- ap += "%f %f l\n" % (p2.x, p2.y)
- ap += "%f %f l\n" % (p3.x, p3.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _le_diamond(annot, p1, p2, lr, fill_color):
- """Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5 # 2*shift*width = length of square edge
- d = shift * max(1, w)
- M = R - (d/2., 0) if lr else L + (d/2., 0)
- r = Rect(M, M) + (-d, -d, d, d) # the square
- # the square makes line longer by (2*shift - 1)*width
- p = (r.tl + (r.bl - r.tl) * 0.5) * im
- ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
- p = (r.tl + (r.tr - r.tl) * 0.5) * im
- ap += "%f %f l\n" % (p.x, p.y)
- p = (r.tr + (r.br - r.tr) * 0.5) * im
- ap += "%f %f l\n" % (p.x, p.y)
- p = (r.br + (r.bl - r.br) * 0.5) * im
- ap += "%f %f l\n" % (p.x, p.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _le_openarrow(annot, p1, p2, lr, fill_color):
- """Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5
- d = shift * max(1, w)
- p2 = R + (d/2., 0) if lr else L - (d/2., 0)
- p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
- p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
- p1 *= im
- p2 *= im
- p3 *= im
- ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
- ap += "%f %f l\n" % (p2.x, p2.y)
- ap += "%f %f l\n" % (p3.x, p3.y)
- ap += _format_g(w) + " w\n"
- ap += scol + "S\nQ\n"
- return ap
- @staticmethod
- def _le_rclosedarrow(annot, p1, p2, lr, fill_color):
- """Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5
- d = shift * max(1, w)
- p2 = R - (2*d, 0) if lr else L + (2*d, 0)
- p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
- p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
- p1 *= im
- p2 *= im
- p3 *= im
- ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
- ap += "%f %f l\n" % (p2.x, p2.y)
- ap += "%f %f l\n" % (p3.x, p3.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _le_ropenarrow(annot, p1, p2, lr, fill_color):
- """Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5
- d = shift * max(1, w)
- p2 = R - (d/3., 0) if lr else L + (d/3., 0)
- p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
- p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
- p1 *= im
- p2 *= im
- p3 *= im
- ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
- ap += "%f %f l\n" % (p2.x, p2.y)
- ap += "%f %f l\n" % (p3.x, p3.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "S\nQ\n"
- return ap
- @staticmethod
- def _le_slash(annot, p1, p2, lr, fill_color):
- """Make stream commands for slash line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- rw = 1.1547 * max(1, w) * 1.0 # makes rect diagonal a 30 deg inclination
- M = R if lr else L
- r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w)
- top = r.tl * im
- bot = r.br * im
- ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
- ap += "%f %f l\n" % (bot.x, bot.y)
- ap += _format_g(w) + " w\n"
- ap += scol + "s\nQ\n"
- return ap
- @staticmethod
- def _le_square(annot, p1, p2, lr, fill_color):
- """Make stream commands for square line end symbol. "lr" denotes left (False) or right point.
- """
- m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
- shift = 2.5 # 2*shift*width = length of square edge
- d = shift * max(1, w)
- M = R - (d/2., 0) if lr else L + (d/2., 0)
- r = Rect(M, M) + (-d, -d, d, d) # the square
- # the square makes line longer by (2*shift - 1)*width
- p = r.tl * im
- ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
- p = r.tr * im
- ap += "%f %f l\n" % (p.x, p.y)
- p = r.br * im
- ap += "%f %f l\n" % (p.x, p.y)
- p = r.bl * im
- ap += "%f %f l\n" % (p.x, p.y)
- ap += _format_g(w) + " w\n"
- ap += scol + fcol + "b\nQ\n"
- return ap
- @staticmethod
- def _oval_string(p1, p2, p3, p4):
- """Return /AP string defining an oval within a 4-polygon provided as points
- """
- def bezier(p, q, r):
- f = "%f %f %f %f %f %f c\n"
- return f % (p.x, p.y, q.x, q.y, r.x, r.y)
- kappa = 0.55228474983 # magic number
- ml = p1 + (p4 - p1) * 0.5 # middle points ...
- mo = p1 + (p2 - p1) * 0.5 # for each ...
- mr = p2 + (p3 - p2) * 0.5 # polygon ...
- mu = p4 + (p3 - p4) * 0.5 # side
- ol1 = ml + (p1 - ml) * kappa # the 8 bezier
- ol2 = mo + (p1 - mo) * kappa # helper points
- or1 = mo + (p2 - mo) * kappa
- or2 = mr + (p2 - mr) * kappa
- ur1 = mr + (p3 - mr) * kappa
- ur2 = mu + (p3 - mu) * kappa
- ul1 = mu + (p4 - mu) * kappa
- ul2 = ml + (p4 - ml) * kappa
- # now draw, starting from middle point of left side
- ap = "%f %f m\n" % (ml.x, ml.y)
- ap += bezier(ol1, ol2, mo)
- ap += bezier(or1, or2, mr)
- ap += bezier(ur1, ur2, mu)
- ap += bezier(ul1, ul2, ml)
- return ap
- @staticmethod
- def _parse_da(annot):
- if g_use_extra:
- val = extra.Tools_parse_da( annot.this)
- else:
- def Tools__parse_da(annot):
- this_annot = annot.this
- assert isinstance(this_annot, mupdf.PdfAnnot)
- this_annot_obj = mupdf.pdf_annot_obj( this_annot)
- pdf = mupdf.pdf_get_bound_document( this_annot_obj)
- try:
- da = mupdf.pdf_dict_get_inheritable( this_annot_obj, PDF_NAME('DA'))
- if not da.m_internal:
- trailer = mupdf.pdf_trailer(pdf)
- da = mupdf.pdf_dict_getl(trailer,
- PDF_NAME('Root'),
- PDF_NAME('AcroForm'),
- PDF_NAME('DA'),
- )
- da_str = mupdf.pdf_to_text_string(da)
- except Exception:
- if g_exceptions_verbose: exception_info()
- return
- return da_str
- val = Tools__parse_da(annot)
- if not val:
- return ((0,), "", 0)
- font = "Helv"
- fsize = 12
- col = (0, 0, 0)
- dat = val.split() # split on any whitespace
- for i, item in enumerate(dat):
- if item == "Tf":
- font = dat[i - 2][1:]
- fsize = float(dat[i - 1])
- dat[i] = dat[i-1] = dat[i-2] = ""
- continue
- if item == "g": # unicolor text
- col = [(float(dat[i - 1]))]
- dat[i] = dat[i-1] = ""
- continue
- if item == "rg": # RGB colored text
- col = [float(f) for f in dat[i - 3:i]]
- dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
- continue
- if item == "k": # CMYK colored text
- col = [float(f) for f in dat[i - 4:i]]
- dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = ""
- continue
- val = (col, font, fsize)
- return val
- @staticmethod
- def _reset_widget(annot):
- this_annot = annot
- this_annot_obj = mupdf.pdf_annot_obj(this_annot)
- pdf = mupdf.pdf_get_bound_document(this_annot_obj)
- mupdf.pdf_field_reset(pdf, this_annot_obj)
- @staticmethod
- def _rotate_matrix(page):
- pdfpage = page._pdf_page(required=False)
- if not pdfpage.m_internal:
- return JM_py_from_matrix(mupdf.FzMatrix())
- return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage))
- @staticmethod
- def _save_widget(annot, widget):
- JM_set_widget_properties(annot, widget)
- def _update_da(annot, da_str):
- if g_use_extra:
- extra.Tools_update_da( annot.this, da_str)
- else:
- try:
- this_annot = annot.this
- assert isinstance(this_annot, mupdf.PdfAnnot)
- mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DA'), da_str)
- mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DS')) # /* not supported */
- mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('RC')) # /* not supported */
- except Exception:
- if g_exceptions_verbose: exception_info()
- return
- return
-
- @staticmethod
- def gen_id():
- global TOOLS_JM_UNIQUE_ID
- TOOLS_JM_UNIQUE_ID += 1
- return TOOLS_JM_UNIQUE_ID
- @staticmethod
- def glyph_cache_empty():
- '''
- Empty the glyph cache.
- '''
- mupdf.fz_purge_glyph_cache()
- @staticmethod
- def image_profile(stream, keep_image=0):
- '''
- Metadata of an image binary stream.
- '''
- return JM_image_profile(stream, keep_image)
-
- @staticmethod
- def mupdf_display_errors(on=None):
- '''
- Set MuPDF error display to True or False.
- '''
- global JM_mupdf_show_errors
- if on is not None:
- JM_mupdf_show_errors = bool(on)
- return JM_mupdf_show_errors
- @staticmethod
- def mupdf_display_warnings(on=None):
- '''
- Set MuPDF warnings display to True or False.
- '''
- global JM_mupdf_show_warnings
- if on is not None:
- JM_mupdf_show_warnings = bool(on)
- return JM_mupdf_show_warnings
- @staticmethod
- def mupdf_version():
- '''Get version of MuPDF binary build.'''
- return mupdf.FZ_VERSION
- @staticmethod
- def mupdf_warnings(reset=1):
- '''
- Get the MuPDF warnings/errors with optional reset (default).
- '''
- # Get any trailing `... repeated <N> times...` message.
- mupdf.fz_flush_warnings()
- ret = '\n'.join( JM_mupdf_warnings_store)
- if reset:
- TOOLS.reset_mupdf_warnings()
- return ret
- @staticmethod
- def reset_mupdf_warnings():
- global JM_mupdf_warnings_store
- JM_mupdf_warnings_store = list()
-
- @staticmethod
- def set_aa_level(level):
- '''
- Set anti-aliasing level.
- '''
- mupdf.fz_set_aa_level(level)
-
- @staticmethod
- def set_annot_stem( stem=None):
- global JM_annot_id_stem
- if stem is None:
- return JM_annot_id_stem
- len_ = len(stem) + 1
- if len_ > 50:
- len_ = 50
- JM_annot_id_stem = stem[:50]
- return JM_annot_id_stem
- @staticmethod
- def set_font_width(doc, xref, width):
- pdf = _as_pdf_document(doc, required=0)
- if not pdf.m_internal:
- return False
- font = mupdf.pdf_load_object(pdf, xref)
- dfonts = mupdf.pdf_dict_get(font, PDF_NAME('DescendantFonts'))
- if mupdf.pdf_is_array(dfonts):
- n = mupdf.pdf_array_len(dfonts)
- for i in range(n):
- dfont = mupdf.pdf_array_get(dfonts, i)
- warray = mupdf.pdf_new_array(pdf, 3)
- mupdf.pdf_array_push(warray, mupdf.pdf_new_int(0))
- mupdf.pdf_array_push(warray, mupdf.pdf_new_int(65535))
- mupdf.pdf_array_push(warray, mupdf.pdf_new_int(width))
- mupdf.pdf_dict_put(dfont, PDF_NAME('W'), warray)
- return True
- @staticmethod
- def set_graphics_min_line_width(min_line_width):
- '''
- Set the graphics minimum line width.
- '''
- mupdf.fz_set_graphics_min_line_width(min_line_width)
- @staticmethod
- def set_icc( on=0):
- """Set ICC color handling on or off."""
- if on:
- if mupdf.FZ_ENABLE_ICC:
- mupdf.fz_enable_icc()
- else:
- RAISEPY( "MuPDF built w/o ICC support",PyExc_ValueError)
- elif mupdf.FZ_ENABLE_ICC:
- mupdf.fz_disable_icc()
-
- @staticmethod
- def set_low_memory( on=None):
- """Set / unset MuPDF device caching."""
- if on is not None:
- _globals.no_device_caching = bool(on)
- return _globals.no_device_caching
- @staticmethod
- def set_small_glyph_heights(on=None):
- """Set / unset small glyph heights."""
- if on is not None:
- _globals.small_glyph_heights = bool(on)
- if g_use_extra:
- extra.set_small_glyph_heights(_globals.small_glyph_heights)
- return _globals.small_glyph_heights
-
- @staticmethod
- def set_subset_fontnames(on=None):
- '''
- Set / unset returning fontnames with their subset prefix.
- '''
- if on is not None:
- _globals.subset_fontnames = bool(on)
- if g_use_extra:
- extra.set_subset_fontnames(_globals.subset_fontnames)
- return _globals.subset_fontnames
-
- @staticmethod
- def show_aa_level():
- '''
- Show anti-aliasing values.
- '''
- return dict(
- graphics = mupdf.fz_graphics_aa_level(),
- text = mupdf.fz_text_aa_level(),
- graphics_min_line_width = mupdf.fz_graphics_min_line_width(),
- )
- @staticmethod
- def store_maxsize():
- '''
- MuPDF store size limit.
- '''
- # fixme: return gctx->store->max.
- return None
- @staticmethod
- def store_shrink(percent):
- '''
- Free 'percent' of current store size.
- '''
- if percent >= 100:
- mupdf.fz_empty_store()
- return 0
- if percent > 0:
- mupdf.fz_shrink_store( 100 - percent)
- # fixme: return gctx->store->size.
-
- @staticmethod
- def store_size():
- '''
- MuPDF current store size.
- '''
- # fixme: return gctx->store->size.
- return None
-
- @staticmethod
- def unset_quad_corrections(on=None):
- '''
- Set ascender / descender corrections on or off.
- '''
- if on is not None:
- _globals.skip_quad_corrections = bool(on)
- if g_use_extra:
- extra.set_skip_quad_corrections(_globals.skip_quad_corrections)
- return _globals.skip_quad_corrections
- # fixme: also defined at top-level.
- JM_annot_id_stem = 'fitz'
- fitz_config = JM_fitz_config()
- # Callbacks not yet supported with cppyy.
- if not mupdf_cppyy:
- mupdf.fz_set_warning_callback(JM_mupdf_warning)
- mupdf.fz_set_error_callback(JM_mupdf_error)
- # If there are pending warnings when we exit, we end up in this sequence:
- #
- # atexit()
- # -> mupdf::internal_thread_state::~internal_thread_state()
- # -> fz_drop_context()
- # -> fz_flush_warnings()
- # -> SWIG Director code
- # -> Python calling JM_mupdf_warning().
- #
- # Unfortunately this causes a SEGV, seemingly because the SWIG Director code has
- # already been torn down.
- #
- # So we use a Python atexit handler to explicitly call fz_flush_warnings();
- # this appears to happen early enough for the Director machinery to still
- # work. So in the sequence above, fz_flush_warnings() will find that there are
- # no pending warnings and will not attempt to call JM_mupdf_warning().
- #
- def _atexit():
- #log( 'PyMuPDF/src/__init__.py:_atexit() called')
- mupdf.fz_flush_warnings()
- mupdf.fz_set_warning_callback(None)
- mupdf.fz_set_error_callback(None)
- #log( '_atexit() returning')
- atexit.register( _atexit)
- # List of (name, red, green, blue) where:
- # name: upper-case name.
- # red, green, blue: integer in range 0..255.
- #
- from . import _wxcolors
- _wxcolors = _wxcolors._wxcolors
- # Dict mapping from name to (red, green, blue).
- # name: lower-case name.
- # red, green, blue: float in range 0..1.
- #
- pdfcolor = dict()
- for name, r, g, b in _wxcolors:
- pdfcolor[name.lower()] = (r/255, g/255, b/255)
- def colors_pdf_dict():
- '''
- Returns dict mapping from name to (red, green, blue).
- name: lower-case name.
- red, green, blue: float in range 0..1.
- '''
- return pdfcolor
- def colors_wx_list():
- '''
- Returns list of (name, red, green, blue) tuples:
- name: upper-case name.
- red, green, blue: integers in range 0..255.
- '''
- return _wxcolors
- # We cannot import utils earlier because it imports this .py file itself and
- # uses some pymupdf.* types in function typing.
- #
- from . import utils
- # Use utils.*() fns for some class methods.
- #
- recover_bbox_quad = utils.recover_bbox_quad
- recover_char_quad = utils.recover_char_quad
- recover_line_quad = utils.recover_line_quad
- recover_quad = utils.recover_quad
- recover_span_quad = utils.recover_span_quad
- Annot.get_text = utils.get_text
- Annot.get_textbox = utils.get_textbox
- Document._do_links = utils.do_links
- Document._do_widgets = utils.do_widgets
- Document.del_toc_item = utils.del_toc_item
- Document.get_char_widths = utils.get_char_widths
- Document.get_oc = utils.get_oc
- Document.get_ocmd = utils.get_ocmd
- Document.get_page_labels = utils.get_page_labels
- Document.get_page_numbers = utils.get_page_numbers
- Document.get_page_pixmap = utils.get_page_pixmap
- Document.get_page_text = utils.get_page_text
- Document.get_toc = utils.get_toc
- Document.has_annots = utils.has_annots
- Document.has_links = utils.has_links
- Document.insert_page = utils.insert_page
- Document.new_page = utils.new_page
- Document.scrub = utils.scrub
- Document.search_page_for = utils.search_page_for
- Document.set_metadata = utils.set_metadata
- Document.set_oc = utils.set_oc
- Document.set_ocmd = utils.set_ocmd
- Document.set_page_labels = utils.set_page_labels
- Document.set_toc = utils.set_toc
- Document.set_toc_item = utils.set_toc_item
- Document.subset_fonts = utils.subset_fonts
- Document.tobytes = Document.write
- Document.xref_copy = utils.xref_copy
- IRect.get_area = utils.get_area
- Page.apply_redactions = utils.apply_redactions
- Page.delete_image = utils.delete_image
- Page.delete_widget = utils.delete_widget
- Page.draw_bezier = utils.draw_bezier
- Page.draw_circle = utils.draw_circle
- Page.draw_curve = utils.draw_curve
- Page.draw_line = utils.draw_line
- Page.draw_oval = utils.draw_oval
- Page.draw_polyline = utils.draw_polyline
- Page.draw_quad = utils.draw_quad
- Page.draw_rect = utils.draw_rect
- Page.draw_sector = utils.draw_sector
- Page.draw_squiggle = utils.draw_squiggle
- Page.draw_zigzag = utils.draw_zigzag
- Page.get_image_info = utils.get_image_info
- Page.get_image_rects = utils.get_image_rects
- Page.get_label = utils.get_label
- Page.get_links = utils.get_links
- Page.get_pixmap = utils.get_pixmap
- Page.get_text = utils.get_text
- Page.get_text_blocks = utils.get_text_blocks
- Page.get_text_selection = utils.get_text_selection
- Page.get_text_words = utils.get_text_words
- Page.get_textbox = utils.get_textbox
- Page.get_textpage_ocr = utils.get_textpage_ocr
- Page.insert_image = utils.insert_image
- Page.insert_link = utils.insert_link
- Page.insert_text = utils.insert_text
- Page.insert_textbox = utils.insert_textbox
- Page.insert_htmlbox = utils.insert_htmlbox
- Page.new_shape = lambda x: utils.Shape(x)
- Page.replace_image = utils.replace_image
- Page.search_for = utils.search_for
- Page.show_pdf_page = utils.show_pdf_page
- Page.update_link = utils.update_link
- Page.write_text = utils.write_text
- Shape = utils.Shape
- from .table import find_tables
- Page.find_tables = find_tables
- Rect.get_area = utils.get_area
- TextWriter.fill_textbox = utils.fill_textbox
- class FitzDeprecation(DeprecationWarning):
- pass
- def restore_aliases():
- warnings.filterwarnings( "once", category=FitzDeprecation)
- def showthis(msg, cat, filename, lineno, file=None, line=None):
- text = warnings.formatwarning(msg, cat, filename, lineno, line=line)
- s = text.find("FitzDeprecation")
- if s < 0:
- log(text)
- return
- text = text[s:].splitlines()[0][4:]
- log(text)
- warnings.showwarning = showthis
- def _alias(class_, new_name, legacy_name=None):
- '''
- Adds an alias for a class_ or module item clled <class_>.<new>.
- class_:
- Class/module to modify; use None for the current module.
- new_name:
- String name of existing item, e.g. name of method.
- legacy_name:
- Name of legacy object to create in <class_>. If None, we generate
- from <item> by removing underscores and capitalising the next
- letter.
- '''
- if class_ is None:
- class_ = sys.modules[__name__]
- if not legacy_name:
- legacy_name = ''
- capitalise_next = False
- for c in new_name:
- if c == '_':
- capitalise_next = True
- elif capitalise_next:
- legacy_name += c.upper()
- capitalise_next = False
- else:
- legacy_name += c
- new_object = getattr( class_, new_name)
- assert not getattr( class_, legacy_name, None), f'class {class_} already has {legacy_name}'
- if callable( new_object):
- def deprecated_function( *args, **kwargs):
- warnings.warn(
- f'"{legacy_name=}" removed from {class_} after v1.19.0 - use "{new_name}".',
- category=FitzDeprecation,
- )
- return new_object( *args, **kwargs)
- setattr( class_, legacy_name, deprecated_function)
- deprecated_function.__doc__ = (
- f'*** Deprecated and removed in version after v1.19.0 - use "{new_name}". ***\n'
- f'{new_object.__doc__}'
- )
- else:
- setattr( class_, legacy_name, new_object)
- _alias( Annot, 'get_file', 'fileGet')
- _alias( Annot, 'get_pixmap')
- _alias( Annot, 'get_sound', 'soundGet')
- _alias( Annot, 'get_text')
- _alias( Annot, 'get_textbox')
- _alias( Annot, 'get_textpage', 'getTextPage')
- _alias( Annot, 'line_ends')
- _alias( Annot, 'set_blendmode', 'setBlendMode')
- _alias( Annot, 'set_border')
- _alias( Annot, 'set_colors')
- _alias( Annot, 'set_flags')
- _alias( Annot, 'set_info')
- _alias( Annot, 'set_line_ends')
- _alias( Annot, 'set_name')
- _alias( Annot, 'set_oc', 'setOC')
- _alias( Annot, 'set_opacity')
- _alias( Annot, 'set_rect')
- _alias( Annot, 'update_file', 'fileUpd')
- _alias( DisplayList, 'get_pixmap')
- _alias( DisplayList, 'get_textpage', 'getTextPage')
- _alias( Document, 'chapter_count')
- _alias( Document, 'chapter_page_count')
- _alias( Document, 'convert_to_pdf', 'convertToPDF')
- _alias( Document, 'copy_page')
- _alias( Document, 'delete_page')
- _alias( Document, 'delete_pages', 'deletePageRange')
- _alias( Document, 'embfile_add', 'embeddedFileAdd')
- _alias( Document, 'embfile_count', 'embeddedFileCount')
- _alias( Document, 'embfile_del', 'embeddedFileDel')
- _alias( Document, 'embfile_get', 'embeddedFileGet')
- _alias( Document, 'embfile_info', 'embeddedFileInfo')
- _alias( Document, 'embfile_names', 'embeddedFileNames')
- _alias( Document, 'embfile_upd', 'embeddedFileUpd')
- _alias( Document, 'extract_font')
- _alias( Document, 'extract_image')
- _alias( Document, 'find_bookmark')
- _alias( Document, 'fullcopy_page')
- _alias( Document, 'get_char_widths')
- _alias( Document, 'get_ocgs', 'getOCGs')
- _alias( Document, 'get_page_fonts', 'getPageFontList')
- _alias( Document, 'get_page_images', 'getPageImageList')
- _alias( Document, 'get_page_pixmap')
- _alias( Document, 'get_page_text')
- _alias( Document, 'get_page_xobjects', 'getPageXObjectList')
- _alias( Document, 'get_sigflags', 'getSigFlags')
- _alias( Document, 'get_toc', 'getToC')
- _alias( Document, 'get_xml_metadata')
- _alias( Document, 'insert_page')
- _alias( Document, 'insert_pdf', 'insertPDF')
- _alias( Document, 'is_dirty')
- _alias( Document, 'is_form_pdf', 'isFormPDF')
- _alias( Document, 'is_pdf', 'isPDF')
- _alias( Document, 'is_reflowable')
- _alias( Document, 'is_repaired')
- _alias( Document, 'last_location')
- _alias( Document, 'load_page')
- _alias( Document, 'make_bookmark')
- _alias( Document, 'move_page')
- _alias( Document, 'needs_pass')
- _alias( Document, 'new_page')
- _alias( Document, 'next_location')
- _alias( Document, 'page_count')
- _alias( Document, 'page_cropbox', 'pageCropBox')
- _alias( Document, 'page_xref')
- _alias( Document, 'pdf_catalog', 'PDFCatalog')
- _alias( Document, 'pdf_trailer', 'PDFTrailer')
- _alias( Document, 'prev_location', 'previousLocation')
- _alias( Document, 'resolve_link')
- _alias( Document, 'search_page_for')
- _alias( Document, 'set_language')
- _alias( Document, 'set_metadata')
- _alias( Document, 'set_toc', 'setToC')
- _alias( Document, 'set_xml_metadata')
- _alias( Document, 'update_object')
- _alias( Document, 'update_stream')
- _alias( Document, 'xref_is_stream', 'isStream')
- _alias( Document, 'xref_length')
- _alias( Document, 'xref_object')
- _alias( Document, 'xref_stream')
- _alias( Document, 'xref_stream_raw')
- _alias( Document, 'xref_xml_metadata', 'metadataXML')
- _alias( IRect, 'get_area')
- _alias( IRect, 'get_area', 'getRectArea')
- _alias( IRect, 'include_point')
- _alias( IRect, 'include_rect')
- _alias( IRect, 'is_empty')
- _alias( IRect, 'is_infinite')
- _alias( Link, 'is_external')
- _alias( Link, 'set_border')
- _alias( Link, 'set_colors')
- _alias( Matrix, 'is_rectilinear')
- _alias( Matrix, 'prerotate', 'preRotate')
- _alias( Matrix, 'prescale', 'preScale')
- _alias( Matrix, 'preshear', 'preShear')
- _alias( Matrix, 'pretranslate', 'preTranslate')
- _alias( None, 'get_pdf_now', 'getPDFnow')
- _alias( None, 'get_pdf_str', 'getPDFstr')
- _alias( None, 'get_text_length')
- _alias( None, 'get_text_length', 'getTextlength')
- _alias( None, 'image_profile', 'ImageProperties')
- _alias( None, 'paper_rect', 'PaperRect')
- _alias( None, 'paper_size', 'PaperSize')
- _alias( None, 'paper_sizes')
- _alias( None, 'planish_line')
- _alias( Outline, 'is_external')
- _alias( Outline, 'is_open')
- _alias( Page, 'add_caret_annot')
- _alias( Page, 'add_circle_annot')
- _alias( Page, 'add_file_annot')
- _alias( Page, 'add_freetext_annot')
- _alias( Page, 'add_highlight_annot')
- _alias( Page, 'add_ink_annot')
- _alias( Page, 'add_line_annot')
- _alias( Page, 'add_polygon_annot')
- _alias( Page, 'add_polyline_annot')
- _alias( Page, 'add_rect_annot')
- _alias( Page, 'add_redact_annot')
- _alias( Page, 'add_squiggly_annot')
- _alias( Page, 'add_stamp_annot')
- _alias( Page, 'add_strikeout_annot')
- _alias( Page, 'add_text_annot')
- _alias( Page, 'add_underline_annot')
- _alias( Page, 'add_widget')
- _alias( Page, 'clean_contents')
- _alias( Page, 'cropbox', 'CropBox')
- _alias( Page, 'cropbox_position', 'CropBoxPosition')
- _alias( Page, 'delete_annot')
- _alias( Page, 'delete_link')
- _alias( Page, 'delete_widget')
- _alias( Page, 'derotation_matrix')
- _alias( Page, 'draw_bezier')
- _alias( Page, 'draw_circle')
- _alias( Page, 'draw_curve')
- _alias( Page, 'draw_line')
- _alias( Page, 'draw_oval')
- _alias( Page, 'draw_polyline')
- _alias( Page, 'draw_quad')
- _alias( Page, 'draw_rect')
- _alias( Page, 'draw_sector')
- _alias( Page, 'draw_squiggle')
- _alias( Page, 'draw_zigzag')
- _alias( Page, 'first_annot')
- _alias( Page, 'first_link')
- _alias( Page, 'first_widget')
- _alias( Page, 'get_contents')
- _alias( Page, 'get_displaylist', 'getDisplayList')
- _alias( Page, 'get_drawings')
- _alias( Page, 'get_fonts', 'getFontList')
- _alias( Page, 'get_image_bbox')
- _alias( Page, 'get_images', 'getImageList')
- _alias( Page, 'get_links')
- _alias( Page, 'get_pixmap')
- _alias( Page, 'get_svg_image', 'getSVGimage')
- _alias( Page, 'get_text')
- _alias( Page, 'get_text_blocks')
- _alias( Page, 'get_text_words')
- _alias( Page, 'get_textbox')
- _alias( Page, 'get_textpage', 'getTextPage')
- _alias( Page, 'insert_font')
- _alias( Page, 'insert_image')
- _alias( Page, 'insert_link')
- _alias( Page, 'insert_text')
- _alias( Page, 'insert_textbox')
- _alias( Page, 'is_wrapped', '_isWrapped')
- _alias( Page, 'load_annot')
- _alias( Page, 'load_links')
- _alias( Page, 'mediabox', 'MediaBox')
- _alias( Page, 'mediabox_size', 'MediaBoxSize')
- _alias( Page, 'new_shape')
- _alias( Page, 'read_contents')
- _alias( Page, 'rotation_matrix')
- _alias( Page, 'search_for')
- _alias( Page, 'set_cropbox', 'setCropBox')
- _alias( Page, 'set_mediabox', 'setMediaBox')
- _alias( Page, 'set_rotation')
- _alias( Page, 'show_pdf_page', 'showPDFpage')
- _alias( Page, 'transformation_matrix')
- _alias( Page, 'update_link')
- _alias( Page, 'wrap_contents')
- _alias( Page, 'write_text')
- _alias( Pixmap, 'clear_with')
- _alias( Pixmap, 'copy', 'copyPixmap')
- _alias( Pixmap, 'gamma_with')
- _alias( Pixmap, 'invert_irect', 'invertIRect')
- _alias( Pixmap, 'pil_save', 'pillowWrite')
- _alias( Pixmap, 'pil_tobytes', 'pillowData')
- _alias( Pixmap, 'save', 'writeImage')
- _alias( Pixmap, 'save', 'writePNG')
- _alias( Pixmap, 'set_alpha')
- _alias( Pixmap, 'set_dpi', 'setResolution')
- _alias( Pixmap, 'set_origin')
- _alias( Pixmap, 'set_pixel')
- _alias( Pixmap, 'set_rect')
- _alias( Pixmap, 'tint_with')
- _alias( Pixmap, 'tobytes', 'getImageData')
- _alias( Pixmap, 'tobytes', 'getPNGData')
- _alias( Pixmap, 'tobytes', 'getPNGdata')
- _alias( Quad, 'is_convex')
- _alias( Quad, 'is_empty')
- _alias( Quad, 'is_rectangular')
- _alias( Rect, 'get_area')
- _alias( Rect, 'get_area', 'getRectArea')
- _alias( Rect, 'include_point')
- _alias( Rect, 'include_rect')
- _alias( Rect, 'is_empty')
- _alias( Rect, 'is_infinite')
- _alias( TextWriter, 'fill_textbox')
- _alias( TextWriter, 'write_text')
- _alias( utils.Shape, 'draw_bezier')
- _alias( utils.Shape, 'draw_circle')
- _alias( utils.Shape, 'draw_curve')
- _alias( utils.Shape, 'draw_line')
- _alias( utils.Shape, 'draw_oval')
- _alias( utils.Shape, 'draw_polyline')
- _alias( utils.Shape, 'draw_quad')
- _alias( utils.Shape, 'draw_rect')
- _alias( utils.Shape, 'draw_sector')
- _alias( utils.Shape, 'draw_squiggle')
- _alias( utils.Shape, 'draw_zigzag')
- _alias( utils.Shape, 'insert_text')
- _alias( utils.Shape, 'insert_textbox')
- if 0:
- restore_aliases()
- __version__ = VersionBind
- __doc__ = (
- f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n'
- f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n'
- )
|