benjamin.harris
/
tasplanning_report


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679
							# ------------------------------------------------------------------------
# Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com
# License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html
#
# Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a
# lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is
# maintained and developed by Artifex Software, Inc. https://artifex.com.
# ------------------------------------------------------------------------
import io
import math
import os
import typing
import weakref

try:
    from . import pymupdf
except Exception:
    import pymupdf
try:
    from . import mupdf
except Exception:
    import mupdf

_format_g = pymupdf.format_g

g_exceptions_verbose = pymupdf.g_exceptions_verbose

point_like = "point_like"
rect_like = "rect_like"
matrix_like = "matrix_like"
quad_like = "quad_like"

# ByteString is gone from typing in 3.14.
# collections.abc.Buffer available from 3.12 only
try:
    ByteString = typing.ByteString
except AttributeError:
    # pylint: disable=unsupported-binary-operation
    ByteString = bytes | bytearray | memoryview

AnyType = typing.Any
OptInt = typing.Union[int, None]
OptFloat = typing.Optional[float]
OptStr = typing.Optional[str]
OptDict = typing.Optional[dict]
OptBytes = typing.Optional[ByteString]
OptSeq = typing.Optional[typing.Sequence]

"""
This is a collection of functions to extend PyMupdf.
"""


def write_text(
        page: pymupdf.Page,
        rect=None,
        writers=None,
        overlay=True,
        color=None,
        opacity=None,
        keep_proportion=True,
        rotate=0,
        oc=0,
        ) -> None:
    """Write the text of one or more pymupdf.TextWriter objects.

    Args:
        rect: target rectangle. If None, the union of the text writers is used.
        writers: one or more pymupdf.TextWriter objects.
        overlay: put in foreground or background.
        keep_proportion: maintain aspect ratio of rectangle sides.
        rotate: arbitrary rotation angle.
        oc: the xref of an optional content object
    """
    assert isinstance(page, pymupdf.Page)
    if not writers:
        raise ValueError("need at least one pymupdf.TextWriter")
    if type(writers) is pymupdf.TextWriter:
        if rotate == 0 and rect is None:
            writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
            return None
        else:
            writers = (writers,)
    clip = writers[0].text_rect
    textdoc = pymupdf.Document()
    tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
    for writer in writers:
        clip |= writer.text_rect
        writer.write_text(tpage, opacity=opacity, color=color)
    if rect is None:
        rect = clip
    page.show_pdf_page(
        rect,
        textdoc,
        0,
        overlay=overlay,
        keep_proportion=keep_proportion,
        rotate=rotate,
        clip=clip,
        oc=oc,
    )
    textdoc = None
    tpage = None


def show_pdf_page(
        page,
        rect,
        docsrc,
        pno=0,
        keep_proportion=True,
        overlay=True,
        oc=0,
        rotate=0,
        clip=None,
        ) -> int:
    """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'.

    Args:
        rect: (rect-like) where to place the source image
        docsrc: (document) source PDF
        pno: (int) source page number
        keep_proportion: (bool) do not change width-height-ratio
        overlay: (bool) put in foreground
        oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF)
        rotate: (int) degrees (multiple of 90)
        clip: (rect-like) part of source page rectangle
    Returns:
        xref of inserted object (for reuse)
    """
    def calc_matrix(sr, tr, keep=True, rotate=0):
        """Calculate transformation matrix from source to target rect.

        Notes:
            The product of four matrices in this sequence: (1) translate correct
            source corner to origin, (2) rotate, (3) scale, (4) translate to
            target's top-left corner.
        Args:
            sr: source rect in PDF (!) coordinate system
            tr: target rect in PDF coordinate system
            keep: whether to keep source ratio of width to height
            rotate: rotation angle in degrees
        Returns:
            Transformation matrix.
        """
        # calc center point of source rect
        smp = (sr.tl + sr.br) / 2.0
        # calc center point of target rect
        tmp = (tr.tl + tr.br) / 2.0

        # m moves to (0, 0), then rotates
        m = pymupdf.Matrix(1, 0, 0, 1, -smp.x, -smp.y) * pymupdf.Matrix(rotate)

        sr1 = sr * m  # resulting source rect to calculate scale factors

        fw = tr.width / sr1.width  # scale the width
        fh = tr.height / sr1.height  # scale the height
        if keep:
            fw = fh = min(fw, fh)  # take min if keeping aspect ratio

        m *= pymupdf.Matrix(fw, fh)  # concat scale matrix
        m *= pymupdf.Matrix(1, 0, 0, 1, tmp.x, tmp.y)  # concat move to target center
        return pymupdf.JM_TUPLE(m)

    pymupdf.CheckParent(page)
    doc = page.parent

    if not doc.is_pdf or not docsrc.is_pdf:
        raise ValueError("is no PDF")

    if rect.is_empty or rect.is_infinite:
        raise ValueError("rect must be finite and not empty")

    while pno < 0:  # support negative page numbers
        pno += docsrc.page_count
    src_page = docsrc[pno]  # load source page

    tar_rect = rect * ~page.transformation_matrix  # target rect in PDF coordinates

    src_rect = src_page.rect if not clip else src_page.rect & clip  # source rect
    if src_rect.is_empty or src_rect.is_infinite:
        raise ValueError("clip must be finite and not empty")
    src_rect = src_rect * ~src_page.transformation_matrix  # ... in PDF coord

    matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)

    # list of existing /Form /XObjects
    ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
    ilst += [i[7] for i in doc.get_page_images(page.number)]
    ilst += [i[4] for i in doc.get_page_fonts(page.number)]

    # create a name not in that list
    n = "fzFrm"
    i = 0
    _imgname = n + "0"
    while _imgname in ilst:
        i += 1
        _imgname = n + str(i)

    isrc = docsrc._graft_id  # used as key for graftmaps
    if doc._graft_id == isrc:
        raise ValueError("source document must not equal target")

    # retrieve / make pymupdf.Graftmap for source PDF
    gmap = doc.Graftmaps.get(isrc, None)
    if gmap is None:
        gmap = pymupdf.Graftmap(doc)
        doc.Graftmaps[isrc] = gmap

    # take note of generated xref for automatic reuse
    pno_id = (isrc, pno)  # id of docsrc[pno]
    xref = doc.ShownPages.get(pno_id, 0)

    if overlay:
        page.wrap_contents()  # ensure a balanced graphics state
    xref = page._show_pdf_page(
        src_page,
        overlay=overlay,
        matrix=matrix,
        xref=xref,
        oc=oc,
        clip=src_rect,
        graftmap=gmap,
        _imgname=_imgname,
    )
    doc.ShownPages[pno_id] = xref

    return xref


def replace_image(page: pymupdf.Page, xref: int, *, filename=None, pixmap=None, stream=None):
    """Replace the image referred to by xref.

    Replace the image by changing the object definition stored under xref. This
    will leave the pages appearance instructions intact, so the new image is
    being displayed with the same bbox, rotation etc.
    By providing a small fully transparent image, an effect as if the image had
    been deleted can be achieved.
    A typical use may include replacing large images by a smaller version,
    e.g. with a lower resolution or graylevel instead of colored.

    Args:
        xref: the xref of the image to replace.
        filename, pixmap, stream: exactly one of these must be provided. The
            meaning being the same as in Page.insert_image.
    """
    doc = page.parent  # the owning document
    if not doc.xref_is_image(xref):
        raise ValueError("xref not an image")  # insert new image anywhere in page
    if bool(filename) + bool(stream) + bool(pixmap) != 1:
        raise ValueError("Exactly one of filename/stream/pixmap must be given")
    new_xref = page.insert_image(
        page.rect, filename=filename, stream=stream, pixmap=pixmap
    )
    doc.xref_copy(new_xref, xref)  # copy over new to old
    last_contents_xref = page.get_contents()[-1]
    # new image insertion has created a new /Contents source,
    # which we will set to spaces now
    doc.update_stream(last_contents_xref, b" ")
    page._image_info = None  # clear cache of extracted image information


def delete_image(page: pymupdf.Page, xref: int):
    """Delete the image referred to by xef.

    Actually replaces by a small transparent Pixmap using method Page.replace_image.

    Args:
        xref: xref of the image to delete.
    """
    # make a small 100% transparent pixmap (of just any dimension)
    pix = pymupdf.Pixmap(pymupdf.csGRAY, (0, 0, 1, 1), 1)
    pix.clear_with()  # clear all samples bytes to 0x00
    page.replace_image(xref, pixmap=pix)


def insert_image(
        page,
        rect,
        *,
        alpha=-1,
        filename=None,
        height=0,
        keep_proportion=True,
        mask=None,
        oc=0,
        overlay=True,
        pixmap=None,
        rotate=0,
        stream=None,
        width=0,
        xref=0,
        ):
    """Insert an image for display in a rectangle.

    Args:
        rect: (rect_like) position of image on the page.
        alpha: (int, optional) set to 0 if image has no transparency.
        filename: (str, Path, file object) image filename.
        height: (int)
        keep_proportion: (bool) keep width / height ratio (default).
        mask: (bytes, optional) image consisting of alpha values to use.
        oc: (int) xref of OCG or OCMD to declare as Optional Content.
        overlay: (bool) put in foreground (default) or background.
        pixmap: (pymupdf.Pixmap) use this as image.
        rotate: (int) rotate by 0, 90, 180 or 270 degrees.
        stream: (bytes) use this as image.
        width: (int)
        xref: (int) use this as image.

    'page' and 'rect' are positional, all other parameters are keywords.

    If 'xref' is given, that image is used. Other input options are ignored.
    Else, exactly one of pixmap, stream or filename must be given.

    'alpha=0' for non-transparent images improves performance significantly.
    Affects stream and filename only.

    Optimum transparent insertions are possible by using filename / stream in
    conjunction with a 'mask' image of alpha values.

    Returns:
        xref (int) of inserted image. Re-use as argument for multiple insertions.
    """
    pymupdf.CheckParent(page)
    doc = page.parent
    if not doc.is_pdf:
        raise ValueError("is no PDF")

    if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
        raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")

    if filename:
        if type(filename) is str:
            pass
        elif hasattr(filename, "absolute"):
            filename = str(filename)
        elif hasattr(filename, "name"):
            filename = filename.name
        else:
            raise ValueError("bad filename")

    if filename and not os.path.exists(filename):
        raise FileNotFoundError("No such file: '%s'" % filename)
    elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
        raise ValueError("stream must be bytes-like / BytesIO")
    elif pixmap and type(pixmap) is not pymupdf.Pixmap:
        raise ValueError("pixmap must be a pymupdf.Pixmap")
    if mask and not (stream or filename):
        raise ValueError("mask requires stream or filename")
    if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
        raise ValueError("mask must be bytes-like / BytesIO")
    while rotate < 0:
        rotate += 360
    while rotate >= 360:
        rotate -= 360
    if rotate not in (0, 90, 180, 270):
        raise ValueError("bad rotate value")

    r = pymupdf.Rect(rect)
    if r.is_empty or r.is_infinite:
        raise ValueError("rect must be finite and not empty")
    clip = r * ~page.transformation_matrix

    # Create a unique image reference name.
    ilst = [i[7] for i in doc.get_page_images(page.number)]
    ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
    ilst += [i[4] for i in doc.get_page_fonts(page.number)]
    n = "fzImg"  # 'pymupdf image'
    i = 0
    _imgname = n + "0"  # first name candidate
    while _imgname in ilst:
        i += 1
        _imgname = n + str(i)  # try new name

    if overlay:
        page.wrap_contents()  # ensure a balanced graphics state
    digests = doc.InsertedImages
    xref, digests = page._insert_image(
        filename=filename,
        pixmap=pixmap,
        stream=stream,
        imask=mask,
        clip=clip,
        overlay=overlay,
        oc=oc,
        xref=xref,
        rotate=rotate,
        keep_proportion=keep_proportion,
        width=width,
        height=height,
        alpha=alpha,
        _imgname=_imgname,
        digests=digests,
    )
    if digests is not None:
        doc.InsertedImages = digests

    return xref


def search_for(
        page,
        text,
        *,
        clip=None,
        quads=False,
        flags=pymupdf.TEXT_DEHYPHENATE
            | pymupdf.TEXT_PRESERVE_WHITESPACE
            | pymupdf.TEXT_PRESERVE_LIGATURES
            | pymupdf.TEXT_MEDIABOX_CLIP
            ,
        textpage=None,
        ) -> list:
    """Search for a string on a page.

    Args:
        text: string to be searched for
        clip: restrict search to this rectangle
        quads: (bool) return quads instead of rectangles
        flags: bit switches, default: join hyphened words
        textpage: a pre-created pymupdf.TextPage
    Returns:
        a list of rectangles or quads, each containing one occurrence.
    """
    if clip is not None:
        clip = pymupdf.Rect(clip)

    pymupdf.CheckParent(page)
    tp = textpage
    if tp is None:
        tp = page.get_textpage(clip=clip, flags=flags)  # create pymupdf.TextPage
    elif getattr(tp, "parent") != page:
        raise ValueError("not a textpage of this page")
    rlist = tp.search(text, quads=quads)
    if textpage is None:
        del tp
    return rlist


def search_page_for(
    doc: pymupdf.Document,
    pno: int,
    text: str,
    quads: bool = False,
    clip: rect_like = None,
    flags: int = pymupdf.TEXT_DEHYPHENATE
            | pymupdf.TEXT_PRESERVE_LIGATURES
            | pymupdf.TEXT_PRESERVE_WHITESPACE
            | pymupdf.TEXT_MEDIABOX_CLIP
            ,
    textpage: pymupdf.TextPage = None,
) -> list:
    """Search for a string on a page.

    Args:
        pno: page number
        text: string to be searched for
        clip: restrict search to this rectangle
        quads: (bool) return quads instead of rectangles
        flags: bit switches, default: join hyphened words
        textpage: reuse a prepared textpage
    Returns:
        a list of rectangles or quads, each containing an occurrence.
    """

    return doc[pno].search_for(
        text,
        quads=quads,
        clip=clip,
        flags=flags,
        textpage=textpage,
    )


def get_text_blocks(
    page: pymupdf.Page,
    clip: rect_like = None,
    flags: OptInt = None,
    textpage: pymupdf.TextPage = None,
    sort: bool = False,
) -> list:
    """Return the text blocks on a page.

    Notes:
        Lines in a block are concatenated with line breaks.
    Args:
        flags: (int) control the amount of data parsed into the textpage.
    Returns:
        A list of the blocks. Each item contains the containing rectangle
        coordinates, text lines, running block number and block type.
    """
    pymupdf.CheckParent(page)
    if flags is None:
        flags = pymupdf.TEXTFLAGS_BLOCKS
    tp = textpage
    if tp is None:
        tp = page.get_textpage(clip=clip, flags=flags)
    elif getattr(tp, "parent") != page:
        raise ValueError("not a textpage of this page")

    blocks = tp.extractBLOCKS()
    if textpage is None:
        del tp
    if sort:
        blocks.sort(key=lambda b: (b[3], b[0]))
    return blocks


def get_text_words(
    page: pymupdf.Page,
    clip: rect_like = None,
    flags: OptInt = None,
    textpage: pymupdf.TextPage = None,
    sort: bool = False,
    delimiters=None,
    tolerance=3,
) -> list:
    """Return the text words as a list with the bbox for each word.

    Args:
        page: pymupdf.Page
        clip: (rect-like) area on page to consider
        flags: (int) control the amount of data parsed into the textpage.
        textpage: (pymupdf.TextPage) either passed-in or None.
        sort: (bool) sort the words in reading sequence.
        delimiters: (str,list) characters to use as word delimiters.
        tolerance: (float) consider words to be part of the same line if
            top or bottom coordinate are not larger than this. Relevant
            only if sort=True.

    Returns:
        Word tuples (x0, y0, x1, y1, "word", bno, lno, wno).
    """

    def sort_words(words):
        """Sort words line-wise, forgiving small deviations."""
        words.sort(key=lambda w: (w[3], w[0]))
        nwords = []  # final word list
        line = [words[0]]  # collects words roughly in same line
        lrect = pymupdf.Rect(words[0][:4])  # start the line rectangle
        for w in words[1:]:
            wrect = pymupdf.Rect(w[:4])
            if (
                abs(wrect.y0 - lrect.y0) <= tolerance
                or abs(wrect.y1 - lrect.y1) <= tolerance
            ):
                line.append(w)
                lrect |= wrect
            else:
                line.sort(key=lambda w: w[0])  # sort words in line l-t-r
                nwords.extend(line)  # append to final words list
                line = [w]  # start next line
                lrect = wrect  # start next line rect

        line.sort(key=lambda w: w[0])  # sort words in line l-t-r
        nwords.extend(line)  # append to final words list

        return nwords

    pymupdf.CheckParent(page)
    if flags is None:
        flags = pymupdf.TEXTFLAGS_WORDS
    tp = textpage
    if tp is None:
        tp = page.get_textpage(clip=clip, flags=flags)
    elif getattr(tp, "parent") != page:
        raise ValueError("not a textpage of this page")

    words = tp.extractWORDS(delimiters)

    # if textpage was given, we subselect the words in clip
    if textpage is not None and clip is not None:
        # sub-select words contained in clip
        clip = pymupdf.Rect(clip)
        words = [
            w for w in words if abs(clip & w[:4]) >= 0.5 * abs(pymupdf.Rect(w[:4]))
        ]

    if textpage is None:
        del tp
    if words and sort:
        # advanced sort if any words found
        words = sort_words(words)

    return words


def get_sorted_text(
    page: pymupdf.Page,
    clip: rect_like = None,
    flags: OptInt = None,
    textpage: pymupdf.TextPage = None,
    tolerance=3,
) -> str:
    """Extract plain text avoiding unacceptable line breaks.

    Text contained in clip will be sorted in reading sequence. Some effort
    is also spent to simulate layout vertically and horizontally.

    Args:
        page: pymupdf.Page
        clip: (rect-like) only consider text inside
        flags: (int) text extraction flags
        textpage: pymupdf.TextPage
        tolerance: (float) consider words to be on the same line if their top
            or bottom coordinates do not differ more than this.

    Notes:
        If a TextPage is provided, all text is checked for being inside clip
        with at least 50% of its bbox.
        This allows to use some "global" TextPage in conjunction with sub-
        selecting words in parts of the defined TextPage rectangle.

    Returns:
        A text string in reading sequence. Left indentation of each line,
        inter-line and inter-word distances strive to reflect the layout.
    """

    def line_text(clip, line):
        """Create the string of one text line.

        We are trying to simulate some horizontal layout here, too.

        Args:
            clip: (pymupdf.Rect) the area from which all text is being read.
            line: (list) word tuples (rect, text) contained in the line
        Returns:
            Text in this line. Generated from words in 'line'. Distance from
            predecessor is translated to multiple spaces, thus simulating
            text indentations and large horizontal distances.
        """
        line.sort(key=lambda w: w[0].x0)
        ltext = ""  # text in the line
        x1 = clip.x0  # end coordinate of ltext
        lrect = pymupdf.EMPTY_RECT()  # bbox of this line
        for r, t in line:
            lrect |= r  # update line bbox
            # convert distance to previous word to multiple spaces
            dist = max(
                int(round((r.x0 - x1) / r.width * len(t))),
                0 if (x1 == clip.x0 or r.x0 <= x1) else 1,
            )  # number of space characters

            ltext += " " * dist + t  # append word string
            x1 = r.x1  # update new end position
        return ltext

    # Extract words in correct sequence first.
    words = [
        (pymupdf.Rect(w[:4]), w[4])
        for w in get_text_words(
            page,
            clip=clip,
            flags=flags,
            textpage=textpage,
            sort=True,
            tolerance=tolerance,
        )
    ]

    if not words:  # no text present
        return ""
    totalbox = pymupdf.EMPTY_RECT()  # area covering all text
    for wr, text in words:
        totalbox |= wr

    lines = []  # list of reconstituted lines
    line = [words[0]]  # current line
    lrect = words[0][0]  # the line's rectangle

    # walk through the words
    for wr, text in words[1:]:  # start with second word
        w0r, _ = line[-1]  # read previous word in current line

        # if this word matches top or bottom of the line, append it
        if abs(lrect.y0 - wr.y0) <= tolerance or abs(lrect.y1 - wr.y1) <= tolerance:
            line.append((wr, text))
            lrect |= wr
        else:
            # output current line and re-initialize
            ltext = line_text(totalbox, line)
            lines.append((lrect, ltext))
            line = [(wr, text)]
            lrect = wr

    # also append unfinished last line
    ltext = line_text(totalbox, line)
    lines.append((lrect, ltext))

    # sort all lines vertically
    lines.sort(key=lambda l: (l[0].y1))

    text = lines[0][1]  # text of first line
    y1 = lines[0][0].y1  # its bottom coordinate
    for lrect, ltext in lines[1:]:
        distance = min(int(round((lrect.y0 - y1) / lrect.height)), 5)
        breaks = "\n" * (distance + 1)
        text += breaks + ltext
        y1 = lrect.y1

    # return text in clip
    return text


def get_textbox(
    page: pymupdf.Page,
    rect: rect_like,
    textpage: pymupdf.TextPage = None,
) -> str:
    tp = textpage
    if tp is None:
        tp = page.get_textpage()
    elif getattr(tp, "parent") != page:
        raise ValueError("not a textpage of this page")
    rc = tp.extractTextbox(rect)
    if textpage is None:
        del tp
    return rc


def get_text_selection(
    page: pymupdf.Page,
    p1: point_like,
    p2: point_like,
    clip: rect_like = None,
    textpage: pymupdf.TextPage = None,
):
    pymupdf.CheckParent(page)
    tp = textpage
    if tp is None:
        tp = page.get_textpage(clip=clip, flags=pymupdf.TEXT_DEHYPHENATE)
    elif getattr(tp, "parent") != page:
        raise ValueError("not a textpage of this page")
    rc = tp.extractSelection(p1, p2)
    if textpage is None:
        del tp
    return rc


def get_textpage_ocr(
    page: pymupdf.Page,
    flags: int = 0,
    language: str = "eng",
    dpi: int = 72,
    full: bool = False,
    tessdata: str = None,
) -> pymupdf.TextPage:
    """Create a Textpage from combined results of normal and OCR text parsing.

    Args:
        flags: (int) control content becoming part of the result.
        language: (str) specify expected language(s). Default is "eng" (English).
        dpi: (int) resolution in dpi, default 72.
        full: (bool) whether to OCR the full page image, or only its images (default)
    """
    pymupdf.CheckParent(page)
    tessdata = pymupdf.get_tessdata(tessdata)

    def full_ocr(page, dpi, language, flags):
        zoom = dpi / 72
        mat = pymupdf.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat)
        ocr_pdf = pymupdf.Document(
                "pdf",
                pix.pdfocr_tobytes(
                    compress=False,
                    language=language,
                    tessdata=tessdata,
                    ),
                )
        ocr_page = ocr_pdf.load_page(0)
        unzoom = page.rect.width / ocr_page.rect.width
        ctm = pymupdf.Matrix(unzoom, unzoom) * page.derotation_matrix
        tpage = ocr_page.get_textpage(flags=flags, matrix=ctm)
        ocr_pdf.close()
        pix = None
        tpage.parent = weakref.proxy(page)
        return tpage

    # if OCR for the full page, OCR its pixmap @ desired dpi
    if full:
        return full_ocr(page, dpi, language, flags)

    # For partial OCR, make a normal textpage, then extend it with text that
    # is OCRed from each image.
    # Because of this, we need the images flag bit set ON.
    tpage = page.get_textpage(flags=flags)
    for block in page.get_text("dict", flags=pymupdf.TEXT_PRESERVE_IMAGES)["blocks"]:
        if block["type"] != 1:  # only look at images
            continue
        bbox = pymupdf.Rect(block["bbox"])
        if bbox.width <= 3 or bbox.height <= 3:  # ignore tiny stuff
            continue
        try:
            pix = pymupdf.Pixmap(block["image"])  # get image pixmap
            if pix.n - pix.alpha != 3:  # we need to convert this to RGB!
                pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
            if pix.alpha:  # must remove alpha channel
                pix = pymupdf.Pixmap(pix, 0)
            imgdoc = pymupdf.Document(
                    "pdf",
                    pix.pdfocr_tobytes(language=language, tessdata=tessdata),
                    )  # pdf with OCRed page
            imgpage = imgdoc.load_page(0)  # read image as a page
            pix = None
            # compute matrix to transform coordinates back to that of 'page'
            imgrect = imgpage.rect  # page size of image PDF
            shrink = pymupdf.Matrix(1 / imgrect.width, 1 / imgrect.height)
            mat = shrink * block["transform"]
            imgpage.extend_textpage(tpage, flags=0, matrix=mat)
            imgdoc.close()
        except (RuntimeError, mupdf.FzErrorBase):
            if 0 and g_exceptions_verbose:
                # Don't show exception info here because it can happen in
                # normal operation (see test_3842b).
                pymupdf.exception_info()
            tpage = None
            pymupdf.message("Falling back to full page OCR")
            return full_ocr(page, dpi, language, flags)

    return tpage


def get_image_info(page: pymupdf.Page, hashes: bool = False, xrefs: bool = False) -> list:
    """Extract image information only from a pymupdf.TextPage.

    Args:
        hashes: (bool) include MD5 hash for each image.
        xrefs: (bool) try to find the xref for each image. Sets hashes to true.
    """
    doc = page.parent
    if xrefs and doc.is_pdf:
        hashes = True
    if not doc.is_pdf:
        xrefs = False
    imginfo = getattr(page, "_image_info", None)
    if imginfo and not xrefs:
        return imginfo
    if not imginfo:
        tp = page.get_textpage(flags=pymupdf.TEXT_PRESERVE_IMAGES)
        imginfo = tp.extractIMGINFO(hashes=hashes)
        del tp
        if hashes:
            page._image_info = imginfo
    if not xrefs or not doc.is_pdf:
        return imginfo
    imglist = page.get_images()
    digests = {}
    for item in imglist:
        xref = item[0]
        pix = pymupdf.Pixmap(doc, xref)
        digests[pix.digest] = xref
        del pix
    for i in range(len(imginfo)):
        item = imginfo[i]
        xref = digests.get(item["digest"], 0)
        item["xref"] = xref
        imginfo[i] = item
    return imginfo


def get_image_rects(page: pymupdf.Page, name, transform=False) -> list:
    """Return list of image positions on a page.

    Args:
        name: (str, list, int) image identification. May be reference name, an
              item of the page's image list or an xref.
        transform: (bool) whether to also return the transformation matrix.
    Returns:
        A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix)
        for all image locations on the page.
    """
    if type(name) in (list, tuple):
        xref = name[0]
    elif type(name) is int:
        xref = name
    else:
        imglist = [i for i in page.get_images() if i[7] == name]
        if imglist == []:
            raise ValueError("bad image name")
        elif len(imglist) != 1:
            raise ValueError("multiple image names found")
        xref = imglist[0][0]
    pix = pymupdf.Pixmap(page.parent, xref)  # make pixmap of the image to compute MD5
    digest = pix.digest
    del pix
    infos = page.get_image_info(hashes=True)
    if not transform:
        bboxes = [pymupdf.Rect(im["bbox"]) for im in infos if im["digest"] == digest]
    else:
        bboxes = [
            (pymupdf.Rect(im["bbox"]), pymupdf.Matrix(im["transform"]))
            for im in infos
            if im["digest"] == digest
        ]
    return bboxes


def get_text(
    page: pymupdf.Page,
    option: str = "text",
    *,
    clip: rect_like = None,
    flags: OptInt = None,
    textpage: pymupdf.TextPage = None,
    sort: bool = False,
    delimiters=None,
    tolerance=3,
):
    """Extract text from a page or an annotation.

    This is a unifying wrapper for various methods of the pymupdf.TextPage class.

    Args:
        option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
        clip: (rect-like) restrict output to this area.
        flags: bit switches to e.g. exclude images or decompose ligatures.
        textpage: reuse this pymupdf.TextPage and make no new one. If specified,
            'flags' and 'clip' are ignored.

    Returns:
        the output of methods get_text_words / get_text_blocks or pymupdf.TextPage
        methods extractText, extractHTML, extractDICT, extractJSON, extractRAWDICT,
        extractXHTML or etractXML respectively.
        Default and misspelling choice is "text".
    """
    formats = {
        "text": pymupdf.TEXTFLAGS_TEXT,
        "html": pymupdf.TEXTFLAGS_HTML,
        "json": pymupdf.TEXTFLAGS_DICT,
        "rawjson": pymupdf.TEXTFLAGS_RAWDICT,
        "xml": pymupdf.TEXTFLAGS_XML,
        "xhtml": pymupdf.TEXTFLAGS_XHTML,
        "dict": pymupdf.TEXTFLAGS_DICT,
        "rawdict": pymupdf.TEXTFLAGS_RAWDICT,
        "words": pymupdf.TEXTFLAGS_WORDS,
        "blocks": pymupdf.TEXTFLAGS_BLOCKS,
    }
    option = option.lower()
    assert option in formats
    if option not in formats:
        option = "text"
    if flags is None:
        flags = formats[option]

    if option == "words":
        return get_text_words(
            page,
            clip=clip,
            flags=flags,
            textpage=textpage,
            sort=sort,
            delimiters=delimiters,
        )
    if option == "blocks":
        return get_text_blocks(
            page, clip=clip, flags=flags, textpage=textpage, sort=sort
        )

    if option == "text" and sort:
        return get_sorted_text(
            page,
            clip=clip,
            flags=flags,
            textpage=textpage,
            tolerance=tolerance,
        )

    pymupdf.CheckParent(page)
    cb = None
    if option in ("html", "xml", "xhtml"):  # no clipping for MuPDF functions
        clip = page.cropbox
    if clip is not None:
        clip = pymupdf.Rect(clip)
        cb = None
    elif type(page) is pymupdf.Page:
        cb = page.cropbox
    # pymupdf.TextPage with or without images
    tp = textpage
    #pymupdf.exception_info()
    if tp is None:
        tp = page.get_textpage(clip=clip, flags=flags)
    elif getattr(tp, "parent") != page:
        raise ValueError("not a textpage of this page")
    #pymupdf.log( '{option=}')
    if option == "json":
        t = tp.extractJSON(cb=cb, sort=sort)
    elif option == "rawjson":
        t = tp.extractRAWJSON(cb=cb, sort=sort)
    elif option == "dict":
        t = tp.extractDICT(cb=cb, sort=sort)
    elif option == "rawdict":
        t = tp.extractRAWDICT(cb=cb, sort=sort)
    elif option == "html":
        t = tp.extractHTML()
    elif option == "xml":
        t = tp.extractXML()
    elif option == "xhtml":
        t = tp.extractXHTML()
    else:
        t = tp.extractText(sort=sort)

    if textpage is None:
        del tp
    return t


def get_page_text(
    doc: pymupdf.Document,
    pno: int,
    option: str = "text",
    clip: rect_like = None,
    flags: OptInt = None,
    textpage: pymupdf.TextPage = None,
    sort: bool = False,
) -> typing.Any:
    """Extract a document page's text by page number.

    Notes:
        Convenience function calling page.get_text().
    Args:
        pno: page number
        option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
    Returns:
        output from page.TextPage().
    """
    return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)

def get_pixmap(
        page: pymupdf.Page,
        *,
        matrix: matrix_like=pymupdf.Identity,
        dpi=None,
        colorspace: pymupdf.Colorspace=pymupdf.csRGB,
        clip: rect_like=None,
        alpha: bool=False,
        annots: bool=True,
        ) -> pymupdf.Pixmap:
    """Create pixmap of page.

    Keyword args:
        matrix: Matrix for transformation (default: Identity).
        dpi: desired dots per inch. If given, matrix is ignored.
        colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
        clip: (irect-like) restrict rendering to this area.
        alpha: (bool) whether to include alpha channel
        annots: (bool) whether to also render annotations
    """
    if dpi:
        zoom = dpi / 72
        matrix = pymupdf.Matrix(zoom, zoom)

    if type(colorspace) is str:
        if colorspace.upper() == "GRAY":
            colorspace = pymupdf.csGRAY
        elif colorspace.upper() == "CMYK":
            colorspace = pymupdf.csCMYK
        else:
            colorspace = pymupdf.csRGB
    if colorspace.n not in (1, 3, 4):
        raise ValueError("unsupported colorspace")

    dl = page.get_displaylist(annots=annots)
    pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
    dl = None
    if dpi:
        pix.set_dpi(dpi, dpi)
    return pix


def get_page_pixmap(
    doc: pymupdf.Document,
    pno: int,
    *,
    matrix: matrix_like = pymupdf.Identity,
    dpi=None,
    colorspace: pymupdf.Colorspace = pymupdf.csRGB,
    clip: rect_like = None,
    alpha: bool = False,
    annots: bool = True,
) -> pymupdf.Pixmap:
    """Create pixmap of document page by page number.

    Notes:
        Convenience function calling page.get_pixmap.
    Args:
        pno: (int) page number
        matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity).
        colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB.
        clip: (irect-like) restrict rendering to this area.
        alpha: (bool) include alpha channel
        annots: (bool) also render annotations
    """
    return doc[pno].get_pixmap(
            matrix=matrix,
            dpi=dpi, colorspace=colorspace,
            clip=clip,
            alpha=alpha,
            annots=annots
            )


def getLinkDict(ln, document=None) -> dict:
    if isinstance(ln, pymupdf.Outline):
        dest = ln.destination(document)
    elif isinstance(ln, pymupdf.Link):
        dest = ln.dest
    else:
        assert 0, f'Unexpected {type(ln)=}.'
    nl = {"kind": dest.kind, "xref": 0}
    try:
        if hasattr(ln, 'rect'):
            nl["from"] = ln.rect
    except Exception:
        # This seems to happen quite often in PyMuPDF/tests.
        if g_exceptions_verbose >= 2:   pymupdf.exception_info()
        pass
    pnt = pymupdf.Point(0, 0)
    if dest.flags & pymupdf.LINK_FLAG_L_VALID:
        pnt.x = dest.lt.x
    if dest.flags & pymupdf.LINK_FLAG_T_VALID:
        pnt.y = dest.lt.y

    if dest.kind == pymupdf.LINK_URI:
        nl["uri"] = dest.uri

    elif dest.kind == pymupdf.LINK_GOTO:
        nl["page"] = dest.page
        nl["to"] = pnt
        if dest.flags & pymupdf.LINK_FLAG_R_IS_ZOOM:
            nl["zoom"] = dest.rb.x
        else:
            nl["zoom"] = 0.0

    elif dest.kind == pymupdf.LINK_GOTOR:
        nl["file"] = dest.file_spec.replace("\\", "/")
        nl["page"] = dest.page
        if dest.page < 0:
            nl["to"] = dest.dest
        else:
            nl["to"] = pnt
            if dest.flags & pymupdf.LINK_FLAG_R_IS_ZOOM:
                nl["zoom"] = dest.rb.x
            else:
                nl["zoom"] = 0.0

    elif dest.kind == pymupdf.LINK_LAUNCH:
        nl["file"] = dest.file_spec.replace("\\", "/")

    elif dest.kind == pymupdf.LINK_NAMED:
        # The dicts should not have same key(s).
        assert not (dest.named.keys() & nl.keys())
        nl.update(dest.named)
        if 'to' in nl:
            nl['to'] = pymupdf.Point(nl['to'])

    else:
        nl["page"] = dest.page
    return nl


def get_links(page: pymupdf.Page) -> list:
    """Create a list of all links contained in a PDF page.

    Notes:
        see PyMuPDF ducmentation for details.
    """

    pymupdf.CheckParent(page)
    ln = page.first_link
    links = []
    while ln:
        nl = getLinkDict(ln, page.parent)
        links.append(nl)
        ln = ln.next
    if links != [] and page.parent.is_pdf:
        linkxrefs = [x for x in
                #page.annot_xrefs()
                pymupdf.JM_get_annot_xref_list2(page)
                if x[1] == pymupdf.PDF_ANNOT_LINK  # pylint: disable=no-member
                ]
        if len(linkxrefs) == len(links):
            for i in range(len(linkxrefs)):
                links[i]["xref"] = linkxrefs[i][0]
                links[i]["id"] = linkxrefs[i][2]
    return links


def get_toc(
    doc: pymupdf.Document,
    simple: bool = True,
) -> list:
    """Create a table of contents.

    Args:
        simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
    """
    def recurse(olItem, liste, lvl):
        """Recursively follow the outline item chain and record item information in a list."""
        while olItem and olItem.this.m_internal:
            if olItem.title:
                title = olItem.title
            else:
                title = " "

            if not olItem.is_external:
                if olItem.uri:
                    if olItem.page == -1:
                        resolve = doc.resolve_link(olItem.uri)
                        page = resolve[0] + 1
                    else:
                        page = olItem.page + 1
                else:
                    page = -1
            else:
                page = -1

            if not simple:
                link = getLinkDict(olItem, doc)
                liste.append([lvl, title, page, link])
            else:
                liste.append([lvl, title, page])

            if olItem.down:
                liste = recurse(olItem.down, liste, lvl + 1)
            olItem = olItem.next
        return liste

    # ensure document is open
    if doc.is_closed:
        raise ValueError("document closed")
    doc.init_doc()
    olItem = doc.outline
    if not olItem:
        return []
    lvl = 1
    liste = []
    toc = recurse(olItem, liste, lvl)
    if doc.is_pdf and not simple:
        doc._extend_toc_items(toc)
    return toc


def del_toc_item(
    doc: pymupdf.Document,
    idx: int,
) -> None:
    """Delete TOC / bookmark item by index."""
    xref = doc.get_outline_xrefs()[idx]
    doc._remove_toc_item(xref)


def set_toc_item(
    doc: pymupdf.Document,
    idx: int,
    dest_dict: OptDict = None,
    kind: OptInt = None,
    pno: OptInt = None,
    uri: OptStr = None,
    title: OptStr = None,
    to: point_like = None,
    filename: OptStr = None,
    zoom: float = 0,
) -> None:
    """Update TOC item by index.

    It allows changing the item's title and link destination.

    Args:
        idx:
            (int) desired index of the TOC list, as created by get_toc.
        dest_dict:
            (dict) destination dictionary as created by get_toc(False).
            Outrules all other parameters. If None, the remaining parameters
            are used to make a dest dictionary.
        kind:
            (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only
            the title will be updated. If pymupdf.LINK_NONE, the TOC item will
            be deleted.
        pno:
            (int) page number (1-based like in get_toc). Required if
            pymupdf.LINK_GOTO.
        uri:
            (str) the URL, required if pymupdf.LINK_URI.
        title:
            (str) the new title. No change if None.
        to:
            (point-like) destination on the target page. If omitted, (72, 36)
            will be used as target coordinates.
        filename:
            (str) destination filename, required for pymupdf.LINK_GOTOR and
            pymupdf.LINK_LAUNCH.
        name:
            (str) a destination name for pymupdf.LINK_NAMED.
        zoom:
            (float) a zoom factor for the target location (pymupdf.LINK_GOTO).
    """
    xref = doc.get_outline_xrefs()[idx]
    page_xref = 0
    if type(dest_dict) is dict:
        if dest_dict["kind"] == pymupdf.LINK_GOTO:
            pno = dest_dict["page"]
            page_xref = doc.page_xref(pno)
            page_height = doc.page_cropbox(pno).height
            to = dest_dict.get('to', pymupdf.Point(72, 36))
            to.y = page_height - to.y
            dest_dict["to"] = to
        action = getDestStr(page_xref, dest_dict)
        if not action.startswith("/A"):
            raise ValueError("bad bookmark dest")
        color = dest_dict.get("color")
        if color:
            color = list(map(float, color))
            if len(color) != 3 or min(color) < 0 or max(color) > 1:
                raise ValueError("bad color value")
        bold = dest_dict.get("bold", False)
        italic = dest_dict.get("italic", False)
        flags = italic + 2 * bold
        collapse = dest_dict.get("collapse")
        return doc._update_toc_item(
            xref,
            action=action[2:],
            title=title,
            color=color,
            flags=flags,
            collapse=collapse,
        )

    if kind == pymupdf.LINK_NONE:  # delete bookmark item
        return doc.del_toc_item(idx)
    if kind is None and title is None:  # treat as no-op
        return None
    if kind is None:  # only update title text
        return doc._update_toc_item(xref, action=None, title=title)

    if kind == pymupdf.LINK_GOTO:
        if pno is None or pno not in range(1, doc.page_count + 1):
            raise ValueError("bad page number")
        page_xref = doc.page_xref(pno - 1)
        page_height = doc.page_cropbox(pno - 1).height
        if to is None:
            to = pymupdf.Point(72, page_height - 36)
        else:
            to = pymupdf.Point(to)
            to.y = page_height - to.y

    ddict = {
        "kind": kind,
        "to": to,
        "uri": uri,
        "page": pno,
        "file": filename,
        "zoom": zoom,
    }
    action = getDestStr(page_xref, ddict)
    if action == "" or not action.startswith("/A"):
        raise ValueError("bad bookmark dest")

    return doc._update_toc_item(xref, action=action[2:], title=title)


def get_area(*args) -> float:
    """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
    rect = args[0]
    if len(args) > 1:
        unit = args[1]
    else:
        unit = "px"
    u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
    f = (u[unit][0] / u[unit][1]) ** 2
    return f * rect.width * rect.height


def set_metadata(doc: pymupdf.Document, m: dict = None) -> None:
    """Update the PDF /Info object.

    Args:
        m: a dictionary like doc.metadata.
    """
    if not doc.is_pdf:
        raise ValueError("is no PDF")
    if doc.is_closed or doc.is_encrypted:
        raise ValueError("document closed or encrypted")
    if m is None:
        m = {}
    elif type(m) is not dict:
        raise ValueError("bad metadata")
    keymap = {
        "author": "Author",
        "producer": "Producer",
        "creator": "Creator",
        "title": "Title",
        "format": None,
        "encryption": None,
        "creationDate": "CreationDate",
        "modDate": "ModDate",
        "subject": "Subject",
        "keywords": "Keywords",
        "trapped": "Trapped",
    }
    valid_keys = set(keymap.keys())
    diff_set = set(m.keys()).difference(valid_keys)
    if diff_set != set():
        msg = "bad dict key(s): %s" % diff_set
        raise ValueError(msg)

    t, temp = doc.xref_get_key(-1, "Info")
    if t != "xref":
        info_xref = 0
    else:
        info_xref = int(temp.replace("0 R", ""))

    if m == {} and info_xref == 0:  # nothing to do
        return

    if info_xref == 0:  # no prev metadata: get new xref
        info_xref = doc.get_new_xref()
        doc.update_object(info_xref, "<<>>")  # fill it with empty object
        doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
    elif m == {}:  # remove existing metadata
        doc.xref_set_key(-1, "Info", "null")
        doc.init_doc()
        return

    for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]:
        pdf_key = keymap[key]
        if not bool(val) or val in ("none", "null"):
            val = "null"
        else:
            val = pymupdf.get_pdf_str(val)
        doc.xref_set_key(info_xref, pdf_key, val)
    doc.init_doc()
    return


def getDestStr(xref: int, ddict: dict) -> str:
    """Calculate the PDF action string.

    Notes:
        Supports Link annotations and outline items (bookmarks).
    """
    if not ddict:
        return ""
    str_goto = lambda a, b, c, d: f"/A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>"
    str_gotor1 = lambda a, b, c, d, e, f: f"/A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F{e}/UF{f}/Type/Filespec>>>>"
    str_gotor2 = lambda a, b, c: f"/A<</S/GoToR/D{a}/F<</F{b}/UF{c}/Type/Filespec>>>>"
    str_launch = lambda a, b: f"/A<</S/Launch/F<</F{a}/UF{b}/Type/Filespec>>>>"
    str_uri = lambda a: f"/A<</S/URI/URI{a}>>"

    if type(ddict) in (int, float):
        dest = str_goto(xref, 0, ddict, 0)
        return dest
    d_kind = ddict.get("kind", pymupdf.LINK_NONE)

    if d_kind == pymupdf.LINK_NONE:
        return ""

    if ddict["kind"] == pymupdf.LINK_GOTO:
        d_zoom = ddict.get("zoom", 0)
        to = ddict.get("to", pymupdf.Point(0, 0))
        d_left, d_top = to
        dest = str_goto(xref, d_left, d_top, d_zoom)
        return dest

    if ddict["kind"] == pymupdf.LINK_URI:
        dest = str_uri(pymupdf.get_pdf_str(ddict["uri"]),)
        return dest

    if ddict["kind"] == pymupdf.LINK_LAUNCH:
        fspec = pymupdf.get_pdf_str(ddict["file"])
        dest = str_launch(fspec, fspec)
        return dest

    if ddict["kind"] == pymupdf.LINK_GOTOR and ddict["page"] < 0:
        fspec = pymupdf.get_pdf_str(ddict["file"])
        dest = str_gotor2(pymupdf.get_pdf_str(ddict["to"]), fspec, fspec)
        return dest

    if ddict["kind"] == pymupdf.LINK_GOTOR and ddict["page"] >= 0:
        fspec = pymupdf.get_pdf_str(ddict["file"])
        dest = str_gotor1(
            ddict["page"],
            ddict["to"].x,
            ddict["to"].y,
            ddict["zoom"],
            fspec,
            fspec,
        )
        return dest

    return ""


def set_toc(
    doc: pymupdf.Document,
    toc: list,
    collapse: int = 1,
) -> int:
    """Create new outline tree (table of contents, TOC).

    Args:
        toc: (list, tuple) each entry must contain level, title, page and
            optionally top margin on the page. None or '()' remove the TOC.
        collapse: (int) collapses entries beyond this level. Zero or None
            shows all entries unfolded.
    Returns:
        the number of inserted items, or the number of removed items respectively.
    """
    if doc.is_closed or doc.is_encrypted:
        raise ValueError("document closed or encrypted")
    if not doc.is_pdf:
        raise ValueError("is no PDF")
    if not toc:  # remove all entries
        return len(doc._delToC())

    # validity checks --------------------------------------------------------
    if type(toc) not in (list, tuple):
        raise ValueError("'toc' must be list or tuple")
    toclen = len(toc)
    page_count = doc.page_count
    t0 = toc[0]
    if type(t0) not in (list, tuple):
        raise ValueError("items must be sequences of 3 or 4 items")
    if t0[0] != 1:
        raise ValueError("hierarchy level of item 0 must be 1")
    for i in list(range(toclen - 1)):
        t1 = toc[i]
        t2 = toc[i + 1]
        if not -1 <= t1[2] <= page_count:
            raise ValueError("row %i: page number out of range" % i)
        if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
            raise ValueError("bad row %i" % (i + 1))
        if (type(t2[0]) is not int) or t2[0] < 1:
            raise ValueError("bad hierarchy level in row %i" % (i + 1))
        if t2[0] > t1[0] + 1:
            raise ValueError("bad hierarchy level in row %i" % (i + 1))
    # no formal errors in toc --------------------------------------------------

    # --------------------------------------------------------------------------
    # make a list of xref numbers, which we can use for our TOC entries
    # --------------------------------------------------------------------------
    old_xrefs = doc._delToC()  # del old outlines, get their xref numbers

    # prepare table of xrefs for new bookmarks
    old_xrefs = []
    xref = [0] + old_xrefs
    xref[0] = doc._getOLRootNumber()  # entry zero is outline root xref number
    if toclen > len(old_xrefs):  # too few old xrefs?
        for i in range((toclen - len(old_xrefs))):
            xref.append(doc.get_new_xref())  # acquire new ones

    lvltab = {0: 0}  # to store last entry per hierarchy level

    # ------------------------------------------------------------------------------
    # contains new outline objects as strings - first one is the outline root
    # ------------------------------------------------------------------------------
    olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
    # ------------------------------------------------------------------------------
    # build olitems as a list of PDF-like connected dictionaries
    # ------------------------------------------------------------------------------
    for i in range(toclen):
        o = toc[i]
        lvl = o[0]  # level
        title = pymupdf.get_pdf_str(o[1])  # title
        pno = min(doc.page_count - 1, max(0, o[2] - 1))  # page number
        page_xref = doc.page_xref(pno)
        page_height = doc.page_cropbox(pno).height
        top = pymupdf.Point(72, page_height - 36)
        dest_dict = {"to": top, "kind": pymupdf.LINK_GOTO}  # fall back target
        if o[2] < 0:
            dest_dict["kind"] = pymupdf.LINK_NONE
        if len(o) > 3:  # some target is specified
            if type(o[3]) in (int, float):  # convert a number to a point
                dest_dict["to"] = pymupdf.Point(72, page_height - o[3])
            else:  # if something else, make sure we have a dict
                # We make a copy of o[3] to avoid modifying our caller's data.
                dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict
                if "to" not in dest_dict:  # target point not in dict?
                    dest_dict["to"] = top  # put default in
                else:  # transform target to PDF coordinates
                    page = doc[pno]
                    point = pymupdf.Point(dest_dict["to"])
                    point.y = page.cropbox.height - point.y
                    point = point * page.rotation_matrix
                    dest_dict["to"] = (point.x, point.y)
        d = {}
        d["first"] = -1
        d["count"] = 0
        d["last"] = -1
        d["prev"] = -1
        d["next"] = -1
        d["dest"] = getDestStr(page_xref, dest_dict)
        d["top"] = dest_dict["to"]
        d["title"] = title
        d["parent"] = lvltab[lvl - 1]
        d["xref"] = xref[i + 1]
        d["color"] = dest_dict.get("color")
        d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
        lvltab[lvl] = i + 1
        parent = olitems[lvltab[lvl - 1]]  # the parent entry

        if (
            dest_dict.get("collapse") or collapse and lvl > collapse
        ):  # suppress expansion
            parent["count"] -= 1  # make /Count negative
        else:
            parent["count"] += 1  # positive /Count

        if parent["first"] == -1:
            parent["first"] = i + 1
            parent["last"] = i + 1
        else:
            d["prev"] = parent["last"]
            prev = olitems[parent["last"]]
            prev["next"] = i + 1
            parent["last"] = i + 1
        olitems.append(d)

    # ------------------------------------------------------------------------------
    # now create each outline item as a string and insert it in the PDF
    # ------------------------------------------------------------------------------
    for i, ol in enumerate(olitems):
        txt = "<<"
        if ol["count"] != 0:
            txt += "/Count %i" % ol["count"]
        try:
            txt += ol["dest"]
        except Exception:
            # Verbose in PyMuPDF/tests.
            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
            pass
        try:
            if ol["first"] > -1:
                txt += "/First %i 0 R" % xref[ol["first"]]
        except Exception:
            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
            pass
        try:
            if ol["last"] > -1:
                txt += "/Last %i 0 R" % xref[ol["last"]]
        except Exception:
            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
            pass
        try:
            if ol["next"] > -1:
                txt += "/Next %i 0 R" % xref[ol["next"]]
        except Exception:
            # Verbose in PyMuPDF/tests.
            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
            pass
        try:
            if ol["parent"] > -1:
                txt += "/Parent %i 0 R" % xref[ol["parent"]]
        except Exception:
            # Verbose in PyMuPDF/tests.
            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
            pass
        try:
            if ol["prev"] > -1:
                txt += "/Prev %i 0 R" % xref[ol["prev"]]
        except Exception:
            # Verbose in PyMuPDF/tests.
            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
            pass
        try:
            txt += "/Title" + ol["title"]
        except Exception:
            # Verbose in PyMuPDF/tests.
            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
            pass

        if ol.get("color") and len(ol["color"]) == 3:
            txt += f"/C[ {_format_g(tuple(ol['color']))}]"
        if ol.get("flags", 0) > 0:
            txt += "/F %i" % ol["flags"]

        if i == 0:  # special: this is the outline root
            txt += "/Type/Outlines"  # so add the /Type entry
        txt += ">>"
        doc.update_object(xref[i], txt)  # insert the PDF object

    doc.init_doc()
    return toclen


def do_widgets(
    tar: pymupdf.Document,
    src: pymupdf.Document,
    graftmap,
    from_page: int = -1,
    to_page: int = -1,
    start_at: int = -1,
    join_duplicates=0,
) -> None:
    """Insert widgets of copied page range into target PDF.

    Parameter values **must** equal those of method insert_pdf() which
    must have been previously executed.
    """
    if not src.is_form_pdf:  # nothing to do: source PDF has no fields
        return

    def clean_kid_parents(acro_fields):
        """ Make sure all kids have correct "Parent" pointers."""
        for i in range(acro_fields.pdf_array_len()):
            parent = acro_fields.pdf_array_get(i)
            kids = parent.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
            for j in range(kids.pdf_array_len()):
                kid = kids.pdf_array_get(j)
                kid.pdf_dict_put(pymupdf.PDF_NAME("Parent"), parent)

    def join_widgets(pdf, acro_fields, xref1, xref2, name):
        """Called for each pair of widgets having the same name.

        Args:
            pdf: target MuPDF document
            acro_fields: object Root/AcroForm/Fields
            xref1, xref2: widget xrefs having same names
            name: (str) the name

        Result:
            Defined or updated widget parent that points to both widgets.
        """

        def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2):
            """Merge widget in xref2 into "Kids" list of widget xref1.

            Args:
                xref1, kids1: target widget and its "Kids" array.
                xref2, kids2: source wwidget and its "Kids" array (may be empty).
            """
            # make indirect objects from widgets
            w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0)
            w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0)
            # find source widget in "Fields" array
            idx = acro_fields.pdf_array_find(w2_ind)
            acro_fields.pdf_array_delete(idx)

            if not kids2.pdf_is_array():  # source widget has no kids
                widget = mupdf.pdf_load_object(pdf, xref2)

                # delete name from widget and insert target as parent
                widget.pdf_dict_del(pymupdf.PDF_NAME("T"))
                widget.pdf_dict_put(pymupdf.PDF_NAME("Parent"), w1_ind)

                # put in target Kids
                kids1.pdf_array_push(w2_ind)
            else:  # copy source kids to target kids
                for i in range(kids2.pdf_array_len()):
                    kid = kids2.pdf_array_get(i)
                    kid.pdf_dict_put(pymupdf.PDF_NAME("Parent"), w1_ind)
                    kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0)
                    kids1.pdf_array_push(kid_ind)

        def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name):
            """Make new "Parent" for two widgets with same name.

            Args:
                xref1, w1: first widget
                xref2, w2: second widget
                name: field name

            Result:
                Both widgets have no "Kids". We create a new object with the
                name and a "Kids" array containing the widgets.
                Original widgets must be removed from AcroForm/Fields.
            """
            # make new "Parent" object
            new = mupdf.pdf_new_dict(pdf, 5)
            new.pdf_dict_put_text_string(pymupdf.PDF_NAME("T"), name)
            kids = new.pdf_dict_put_array(pymupdf.PDF_NAME("Kids"), 2)
            new_obj = mupdf.pdf_add_object(pdf, new)
            new_obj_xref = new_obj.pdf_to_num()
            new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0)

            # copy over some required source widget properties
            ft = w1.pdf_dict_get(pymupdf.PDF_NAME("FT"))
            w1.pdf_dict_del(pymupdf.PDF_NAME("FT"))
            new_obj.pdf_dict_put(pymupdf.PDF_NAME("FT"), ft)

            aa = w1.pdf_dict_get(pymupdf.PDF_NAME("AA"))
            w1.pdf_dict_del(pymupdf.PDF_NAME("AA"))
            new_obj.pdf_dict_put(pymupdf.PDF_NAME("AA"), aa)

            # remove name field, insert "Parent" field in source widgets
            w1.pdf_dict_del(pymupdf.PDF_NAME("T"))
            w1.pdf_dict_put(pymupdf.PDF_NAME("Parent"), new_ind)
            w2.pdf_dict_del(pymupdf.PDF_NAME("T"))
            w2.pdf_dict_put(pymupdf.PDF_NAME("Parent"), new_ind)

            # put source widgets in "kids" array
            ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0)
            ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0)
            kids.pdf_array_push(ind1)
            kids.pdf_array_push(ind2)

            # remove source widgets from "AcroForm/Fields"
            idx = acro_fields.pdf_array_find(ind1)
            acro_fields.pdf_array_delete(idx)
            idx = acro_fields.pdf_array_find(ind2)
            acro_fields.pdf_array_delete(idx)

            acro_fields.pdf_array_push(new_ind)

        w1 = mupdf.pdf_load_object(pdf, xref1)
        w2 = mupdf.pdf_load_object(pdf, xref2)
        kids1 = w1.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
        kids2 = w2.pdf_dict_get(pymupdf.PDF_NAME("Kids"))

        # check which widget has a suitable "Kids" array
        if kids1.pdf_is_array():
            re_target(pdf, acro_fields, xref1, kids1, xref2, kids2)  # pylint: disable=arguments-out-of-order
        elif kids2.pdf_is_array():
            re_target(pdf, acro_fields, xref2, kids2, xref1, kids1)  # pylint: disable=arguments-out-of-order
        else:
            new_target(pdf, acro_fields, xref1, w1, xref2, w2, name)  # pylint: disable=arguments-out-of-order

    def get_kids(parent, kids_list):
        """Return xref list of leaf kids for a parent.

        Call with an empty list.
        """
        kids = mupdf.pdf_dict_get(parent, pymupdf.PDF_NAME("Kids"))
        if not kids.pdf_is_array():
            return kids_list
        for i in range(kids.pdf_array_len()):
            kid = kids.pdf_array_get(i)
            if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, pymupdf.PDF_NAME("Kids"))):
                kids_list = get_kids(kid, kids_list)
            else:
                kids_list.append(kid.pdf_to_num())
        return kids_list

    def kids_xrefs(widget):
        """Get the xref of top "Parent" and the list of leaf widgets."""
        kids_list = []
        parent = mupdf.pdf_dict_get(widget, pymupdf.PDF_NAME("Parent"))
        parent_xref = parent.pdf_to_num()
        if parent_xref == 0:
            return parent_xref, kids_list
        kids_list = get_kids(parent, kids_list)
        return parent_xref, kids_list

    def deduplicate_names(pdf, acro_fields, join_duplicates=False):
        """Handle any widget name duplicates caused by the merge."""
        names = {}  # key is a widget name, value a list of widgets having it.

        # extract all names and widgets in "AcroForm/Fields"
        for i in range(mupdf.pdf_array_len(acro_fields)):
            wobject = mupdf.pdf_array_get(acro_fields, i)
            xref = wobject.pdf_to_num()

            # extract widget name and collect widget(s) using it
            T = mupdf.pdf_dict_get_text_string(wobject, pymupdf.PDF_NAME("T"))
            xrefs = names.get(T, [])
            xrefs.append(xref)
            names[T] = xrefs

        for name, xrefs in names.items():
            if len(xrefs) < 2:
                continue
            xref0, xref1 = xrefs[:2]  # only exactly 2 should occur!
            if join_duplicates:  # combine fields with equal names
                join_widgets(pdf, acro_fields, xref0, xref1, name)
            else:  # make field names unique
                newname = name + f" [{xref1}]"  # append this to the name
                wobject = mupdf.pdf_load_object(pdf, xref1)
                wobject.pdf_dict_put_text_string(pymupdf.PDF_NAME("T"), newname)

        clean_kid_parents(acro_fields)

    def get_acroform(doc):
        """Retrieve the AcroForm dictionary form a PDF."""
        pdf = mupdf.pdf_document_from_fz_document(doc)
        # AcroForm (= central form field info)
        return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm")

    tarpdf = mupdf.pdf_document_from_fz_document(tar)
    srcpdf = mupdf.pdf_document_from_fz_document(src)

    if tar.is_form_pdf:
        # target is a Form PDF, so use it to include source fields
        acro = get_acroform(tar)
        # Important arrays in AcroForm
        acro_fields = acro.pdf_dict_get(pymupdf.PDF_NAME("Fields"))
        tar_co = acro.pdf_dict_get(pymupdf.PDF_NAME("CO"))
        if not tar_co.pdf_is_array():
            tar_co = acro.pdf_dict_put_array(pymupdf.PDF_NAME("CO"), 5)
    else:
        # target is no Form PDF, so copy over source AcroForm
        acro = mupdf.pdf_deep_copy_obj(get_acroform(src))  # make a copy

        # Clear "Fields" and "CO" arrays: will be populated by page fields.
        # This is required to avoid copying unneeded objects.
        acro.pdf_dict_del(pymupdf.PDF_NAME("Fields"))
        acro.pdf_dict_put_array(pymupdf.PDF_NAME("Fields"), 5)
        acro.pdf_dict_del(pymupdf.PDF_NAME("CO"))
        acro.pdf_dict_put_array(pymupdf.PDF_NAME("CO"), 5)

        # Enrich AcroForm for copying to target
        acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro)

        # Insert AcroForm into target PDF
        acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft)
        acro_fields = acro_tar.pdf_dict_get(pymupdf.PDF_NAME("Fields"))
        tar_co = acro_tar.pdf_dict_get(pymupdf.PDF_NAME("CO"))

        # get its xref and insert it into target catalog
        tar_xref = acro_tar.pdf_to_num()
        acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
        root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), pymupdf.PDF_NAME("Root"))
        root.pdf_dict_put(pymupdf.PDF_NAME("AcroForm"), acro_tar_ind)

    if from_page <= to_page:
        src_range = range(from_page, to_page + 1)
    else:
        src_range = range(from_page, to_page - 1, -1)

    parents = {}  # information about widget parents

    # remove "P" owning page reference from all widgets of all source pages
    for i in src_range:
        src_page = src[i]
        for xref in [
            xref
            for xref, wtype, _ in src_page.annot_xrefs()
            if wtype == pymupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
        ]:
            w_obj = mupdf.pdf_load_object(srcpdf, xref)
            w_obj.pdf_dict_del(pymupdf.PDF_NAME("P"))

            # get the widget's parent structure
            parent_xref, old_kids = kids_xrefs(w_obj)
            if parent_xref:
                parents[parent_xref] = {
                    "new_xref": 0,
                    "old_kids": old_kids,
                    "new_kids": [],
                }
    # Copy over Parent widgets first - they are not page-dependent
    for xref in parents.keys():  # pylint: disable=consider-using-dict-items
        parent = mupdf.pdf_load_object(srcpdf, xref)
        parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent)
        parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft)
        kids_xrefs_new = get_kids(parent_tar, [])
        parent_xref_new = parent_tar.pdf_to_num()
        parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0)
        acro_fields.pdf_array_push(parent_ind)
        parents[xref]["new_xref"] = parent_xref_new
        parents[xref]["new_kids"] = kids_xrefs_new

    for i in range(len(src_range)):
        # read first copied over page in target
        tar_page = tar[start_at + i]

        # read the original page in the source PDF
        src_page = src[src_range[i]]

        # now walk through source page widgets and copy over
        w_xrefs = [  # widget xrefs of the source page
            xref
            for xref, wtype, _ in src_page.annot_xrefs()
            if wtype == pymupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
        ]
        if not w_xrefs:  # no widgets on this source page
            continue

        # convert to formal PDF page
        tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page)

        # extract annotations array
        tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), pymupdf.PDF_NAME("Annots"))
        if not mupdf.pdf_is_array(tar_annots):
            tar_annots = mupdf.pdf_dict_put_array(
                tar_page_pdf.obj(), pymupdf.PDF_NAME("Annots"), 5
            )

        for xref in w_xrefs:
            w_obj = mupdf.pdf_load_object(srcpdf, xref)

            # check if field takes part in inter-field validations
            is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C"))

            # check if parent of widget already in target
            parent_xref = mupdf.pdf_to_num(
                w_obj.pdf_dict_get(pymupdf.PDF_NAME("Parent"))
            )
            if parent_xref == 0:  # parent not in target yet
                try:
                    w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj)
                except Exception as e:
                    pymupdf.message_warning(f"cannot copy widget at {xref=}: {e}")
                    continue
                w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft)
                tar_xref = w_obj_tar.pdf_to_num()
                w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
                mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
                mupdf.pdf_array_push(acro_fields, w_obj_tar_ind)
            else:
                parent = parents[parent_xref]
                idx = parent["old_kids"].index(xref)  # search for xref in parent
                tar_xref = parent["new_kids"][idx]
                w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
                mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)

            # Into "AcroForm/CO" if a computation field.
            if is_aac:
                mupdf.pdf_array_push(tar_co, w_obj_tar_ind)

    deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates)

def do_links(
    doc1: pymupdf.Document,
    doc2: pymupdf.Document,
    from_page: int = -1,
    to_page: int = -1,
    start_at: int = -1,
) -> None:
    """Insert links contained in copied page range into destination PDF.

    Parameter values **must** equal those of method insert_pdf(), which must
    have been previously executed.
    """
    #pymupdf.log( 'utils.do_links()')
    # --------------------------------------------------------------------------
    # internal function to create the actual "/Annots" object string
    # --------------------------------------------------------------------------
    def cre_annot(lnk, xref_dst, pno_src, ctm):
        """Create annotation object string for a passed-in link."""

        r = lnk["from"] * ctm  # rect in PDF coordinates
        rect = _format_g(tuple(r))
        if lnk["kind"] == pymupdf.LINK_GOTO:
            txt = pymupdf.annot_skel["goto1"]  # annot_goto
            idx = pno_src.index(lnk["page"])
            p = lnk["to"] * ctm  # target point in PDF coordinates
            annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect)

        elif lnk["kind"] == pymupdf.LINK_GOTOR:
            if lnk["page"] >= 0:
                txt = pymupdf.annot_skel["gotor1"]  # annot_gotor
                pnt = lnk.get("to", pymupdf.Point(0, 0))  # destination point
                if type(pnt) is not pymupdf.Point:
                    pnt = pymupdf.Point(0, 0)
                annot = txt(
                    lnk["page"],
                    pnt.x,
                    pnt.y,
                    lnk["zoom"],
                    lnk["file"],
                    lnk["file"],
                    rect,
                )
            else:
                txt = pymupdf.annot_skel["gotor2"]  # annot_gotor_n
                to = pymupdf.get_pdf_str(lnk["to"])
                to = to[1:-1]
                f = lnk["file"]
                annot = txt(to, f, rect)

        elif lnk["kind"] == pymupdf.LINK_LAUNCH:
            txt = pymupdf.annot_skel["launch"]  # annot_launch
            annot = txt(lnk["file"], lnk["file"], rect)

        elif lnk["kind"] == pymupdf.LINK_URI:
            txt = pymupdf.annot_skel["uri"]  # annot_uri
            annot = txt(lnk["uri"], rect)

        else:
            annot = ""

        return annot

    # --------------------------------------------------------------------------

    # validate & normalize parameters
    if from_page < 0:
        fp = 0
    elif from_page >= doc2.page_count:
        fp = doc2.page_count - 1
    else:
        fp = from_page

    if to_page < 0 or to_page >= doc2.page_count:
        tp = doc2.page_count - 1
    else:
        tp = to_page

    if start_at < 0:
        raise ValueError("'start_at' must be >= 0")
    sa = start_at

    incr = 1 if fp <= tp else -1  # page range could be reversed

    # lists of source / destination page numbers
    pno_src = list(range(fp, tp + incr, incr))
    pno_dst = [sa + i for i in range(len(pno_src))]

    # lists of source / destination page xrefs
    xref_src = []
    xref_dst = []
    for i in range(len(pno_src)):
        p_src = pno_src[i]
        p_dst = pno_dst[i]
        old_xref = doc2.page_xref(p_src)
        new_xref = doc1.page_xref(p_dst)
        xref_src.append(old_xref)
        xref_dst.append(new_xref)

    # create the links for each copied page in destination PDF
    for i in range(len(xref_src)):
        page_src = doc2[pno_src[i]]  # load source page
        links = page_src.get_links()  # get all its links
        #pymupdf.log( '{pno_src=}')
        #pymupdf.log( '{type(page_src)=}')
        #pymupdf.log( '{page_src=}')
        #pymupdf.log( '{=i len(links)}')
        if len(links) == 0:  # no links there
            page_src = None
            continue
        ctm = ~page_src.transformation_matrix  # calc page transformation matrix
        page_dst = doc1[pno_dst[i]]  # load destination page
        link_tab = []  # store all link definitions here
        for l in links:
            if l["kind"] == pymupdf.LINK_GOTO and (l["page"] not in pno_src):
                continue  # GOTO link target not in copied pages
            annot_text = cre_annot(l, xref_dst, pno_src, ctm)
            if annot_text:
                link_tab.append(annot_text)
        if link_tab != []:
            page_dst._addAnnot_FromString( tuple(link_tab))
    #pymupdf.log( 'utils.do_links() returning.')


def getLinkText(page: pymupdf.Page, lnk: dict) -> str:
    # --------------------------------------------------------------------------
    # define skeletons for /Annots object texts
    # --------------------------------------------------------------------------
    ctm = page.transformation_matrix
    ictm = ~ctm
    r = lnk["from"]
    rect = _format_g(tuple(r * ictm))

    annot = ""
    if lnk["kind"] == pymupdf.LINK_GOTO:
        if lnk["page"] >= 0:
            txt = pymupdf.annot_skel["goto1"]  # annot_goto
            pno = lnk["page"]
            xref = page.parent.page_xref(pno)
            pnt = lnk.get("to", pymupdf.Point(0, 0))  # destination point
            dest_page = page.parent[pno]
            dest_ctm = dest_page.transformation_matrix
            dest_ictm = ~dest_ctm
            ipnt = pnt * dest_ictm
            annot = txt(xref, ipnt.x, ipnt.y, lnk.get("zoom", 0), rect)
        else:
            txt = pymupdf.annot_skel["goto2"]  # annot_goto_n
            annot = txt(pymupdf.get_pdf_str(lnk["to"]), rect)

    elif lnk["kind"] == pymupdf.LINK_GOTOR:
        if lnk["page"] >= 0:
            txt = pymupdf.annot_skel["gotor1"]  # annot_gotor
            pnt = lnk.get("to", pymupdf.Point(0, 0))  # destination point
            if type(pnt) is not pymupdf.Point:
                pnt = pymupdf.Point(0, 0)
            annot = txt(
                lnk["page"],
                pnt.x,
                pnt.y,
                lnk.get("zoom", 0),
                lnk["file"],
                lnk["file"],
                rect,
            )
        else:
            txt = pymupdf.annot_skel["gotor2"]  # annot_gotor_n
            annot = txt(pymupdf.get_pdf_str(lnk["to"]), lnk["file"], rect)

    elif lnk["kind"] == pymupdf.LINK_LAUNCH:
        txt = pymupdf.annot_skel["launch"]  # annot_launch
        annot = txt(lnk["file"], lnk["file"], rect)

    elif lnk["kind"] == pymupdf.LINK_URI:
        txt = pymupdf.annot_skel["uri"]  # txt = annot_uri
        annot = txt(lnk["uri"], rect)

    elif lnk["kind"] == pymupdf.LINK_NAMED:
        txt = pymupdf.annot_skel["named"]  # annot_named
        lname = lnk.get("name")  # check presence of key
        if lname is None:  # if missing, fall back to alternative
            lname = lnk["nameddest"]
        annot = txt(lname, rect)
    if not annot:
        return annot

    # add a /NM PDF key to the object definition
    link_names = dict(  # existing ids and their xref
        [(x[0], x[2]) for x in page.annot_xrefs() if x[1] == pymupdf.PDF_ANNOT_LINK]   # pylint: disable=no-member
    )

    old_name = lnk.get("id", "")  # id value in the argument

    if old_name and (lnk["xref"], old_name) in link_names.items():
        name = old_name  # no new name if this is an update only
    else:
        i = 0
        stem = pymupdf.TOOLS.set_annot_stem() + "-L%i"
        while True:
            name = stem % i
            if name not in link_names.values():
                break
            i += 1
    # add /NM key to object definition
    annot = annot.replace("/Link", "/Link/NM(%s)" % name)
    return annot


def delete_widget(page: pymupdf.Page, widget: pymupdf.Widget) -> pymupdf.Widget:
    """Delete widget from page and return the next one."""
    pymupdf.CheckParent(page)
    annot = getattr(widget, "_annot", None)
    if annot is None:
        raise ValueError("bad type: widget")
    nextwidget = widget.next
    page.delete_annot(annot)
    widget._annot.parent = None
    keylist = list(widget.__dict__.keys())
    for key in keylist:
        del widget.__dict__[key]
    return nextwidget


def update_link(page: pymupdf.Page, lnk: dict) -> None:
    """Update a link on the current page."""
    pymupdf.CheckParent(page)
    annot = getLinkText(page, lnk)
    if annot == "":
        raise ValueError("link kind not supported")

    page.parent.update_object(lnk["xref"], annot, page=page)


def insert_link(page: pymupdf.Page, lnk: dict, mark: bool = True) -> None:
    """Insert a new link for the current page."""
    pymupdf.CheckParent(page)
    annot = getLinkText(page, lnk)
    if annot == "":
        raise ValueError("link kind not supported")
    page._addAnnot_FromString((annot,))


def insert_textbox(
    page: pymupdf.Page,
    rect: rect_like,
    buffer: typing.Union[str, list],
    *,
    fontname: str = "helv",
    fontfile: OptStr = None,
    set_simple: int = 0,
    encoding: int = 0,
    fontsize: float = 11,
    lineheight: OptFloat = None,
    color: OptSeq = None,
    fill: OptSeq = None,
    expandtabs: int = 1,
    align: int = 0,
    rotate: int = 0,
    render_mode: int = 0,
    miter_limit: float = 1,
    border_width: float = 0.05,
    morph: OptSeq = None,
    overlay: bool = True,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> float:
    """Insert text into a given rectangle.

    Notes:
        Creates a Shape object, uses its same-named method and commits it.
    Parameters:
        rect: (rect-like) area to use for text.
        buffer: text to be inserted
        fontname: a Base-14 font, font name or '/name'
        fontfile: name of a font file
        fontsize: font size
        lineheight: overwrite the font property
        color: RGB color triple
        expandtabs: handles tabulators with string function
        align: left, center, right, justified
        rotate: 0, 90, 180, or 270 degrees
        morph: morph box with a matrix and a fixpoint
        overlay: put text in foreground or background
    Returns:
        unused or deficit rectangle area (float)
    """
    img = page.new_shape()
    rc = img.insert_textbox(
        rect,
        buffer,
        fontsize=fontsize,
        lineheight=lineheight,
        fontname=fontname,
        fontfile=fontfile,
        set_simple=set_simple,
        encoding=encoding,
        color=color,
        fill=fill,
        expandtabs=expandtabs,
        render_mode=render_mode,
        miter_limit=miter_limit,
        border_width=border_width,
        align=align,
        rotate=rotate,
        morph=morph,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    if rc >= 0:
        img.commit(overlay)
    return rc


def insert_text(
    page: pymupdf.Page,
    point: point_like,
    text: typing.Union[str, list],
    *,
    fontsize: float = 11,
    lineheight: OptFloat = None,
    fontname: str = "helv",
    fontfile: OptStr = None,
    set_simple: int = 0,
    encoding: int = 0,
    color: OptSeq = None,
    fill: OptSeq = None,
    border_width: float = 0.05,
    miter_limit: float = 1,
    render_mode: int = 0,
    rotate: int = 0,
    morph: OptSeq = None,
    overlay: bool = True,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
):

    img = page.new_shape()
    rc = img.insert_text(
        point,
        text,
        fontsize=fontsize,
        lineheight=lineheight,
        fontname=fontname,
        fontfile=fontfile,
        set_simple=set_simple,
        encoding=encoding,
        color=color,
        fill=fill,
        border_width=border_width,
        render_mode=render_mode,
        miter_limit=miter_limit,
        rotate=rotate,
        morph=morph,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    if rc >= 0:
        img.commit(overlay)
    return rc


def insert_htmlbox(
    page,
    rect,
    text,
    *,
    css=None,
    scale_low=0,
    archive=None,
    rotate=0,
    oc=0,
    opacity=1,
    overlay=True,
) -> float:
    """Insert text with optional HTML tags and stylings into a rectangle.

    Args:
        rect: (rect-like) rectangle into which the text should be placed.
        text: (str) text with optional HTML tags and stylings.
        css: (str) CSS styling commands.
        scale_low: (float) force-fit content by scaling it down. Must be in
            range [0, 1]. If 1, no scaling will take place. If 0, arbitrary
            down-scaling is acceptable. A value of 0.1 would mean that content
            may be scaled down by at most 90%.
        archive: Archive object pointing to locations of used fonts or images
        rotate: (int) rotate the text in the box by a multiple of 90 degrees.
        oc: (int) the xref of an OCG / OCMD (Optional Content).
        opacity: (float) set opacity of inserted content.
        overlay: (bool) put text on top of page content.
    Returns:
        A tuple of floats (spare_height, scale).
        spare_height: -1 if content did not fit, else >= 0. It is the height of the
               unused (still available) rectangle stripe. Positive only if
               scale_min = 1 (no down scaling).
        scale: downscaling factor, 0 < scale <= 1. Set to 0 if spare_height = -1 (no fit).
    """

    # normalize rotation angle
    if not rotate % 90 == 0:
        raise ValueError("bad rotation angle")
    while rotate < 0:
        rotate += 360
    while rotate >= 360:
        rotate -= 360

    if not 0 <= scale_low <= 1:
        raise ValueError("'scale_low' must be in [0, 1]")

    if css is None:
        css = ""

    rect = pymupdf.Rect(rect)
    if rotate in (90, 270):
        temp_rect = pymupdf.Rect(0, 0, rect.height, rect.width)
    else:
        temp_rect = pymupdf.Rect(0, 0, rect.width, rect.height)

    # use a small border by default
    mycss = "body {margin:1px;}" + css  # append user CSS

    # either make a story, or accept a given one
    if isinstance(text, str):  # if a string, convert to a Story
        story = pymupdf.Story(html=text, user_css=mycss, archive=archive)
    elif isinstance(text, pymupdf.Story):
        story = text
    else:
        raise ValueError("'text' must be a string or a Story")
    # ----------------------------------------------------------------
    # Find a scaling factor that lets our story fit in
    # ----------------------------------------------------------------
    scale_max = None if scale_low == 0 else 1 / scale_low

    fit = story.fit_scale(temp_rect, scale_min=1, scale_max=scale_max)
    if not fit.big_enough:  # there was no fit
        return (-1, scale_low)

    filled = fit.filled
    scale = 1 / fit.parameter  # shrink factor

    spare_height = fit.rect.y1 - filled[3]  # unused room at rectangle bottom
    # Note: due to MuPDF's logic this may be negative even for successful fits.
    if scale != 1 or spare_height < 0:  # if scaling occurred, set spare_height to 0
        spare_height = 0

    def rect_function(*args):
        return fit.rect, fit.rect, pymupdf.Identity

    # draw story on temp PDF page
    doc = story.write_with_links(rect_function)

    # Insert opacity if requested.
    # For this, we prepend a command to the /Contents.
    if 0 <= opacity < 1:
        tpage = doc[0]  # load page
        # generate /ExtGstate for the page
        alp0 = tpage._set_opacity(CA=opacity, ca=opacity)
        s = f"/{alp0} gs\n"  # generate graphic state command
        pymupdf.TOOLS._insert_contents(tpage, s.encode(), 0)

    # put result in target page
    page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay)

    # -------------------------------------------------------------------------
    # re-insert links in target rect (show_pdf_page cannot copy annotations)
    # -------------------------------------------------------------------------
    # scaled center point of fit.rect
    mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale

    # center point of target rect
    mp2 = (rect.tl + rect.br) / 2

    # compute link positioning matrix:
    # - move center of scaled-down fit.rect to (0,0)
    # - rotate
    # - move (0,0) to center of target rect
    mat = (
        pymupdf.Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y)
        * pymupdf.Matrix(-rotate)
        * pymupdf.Matrix(1, 0, 0, 1, mp2.x, mp2.y)
    )

    # copy over links
    for link in doc[0].get_links():
        link["from"] *= mat
        page.insert_link(link)

    return spare_height, scale


def new_page(
    doc: pymupdf.Document,
    pno: int = -1,
    width: float = 595,
    height: float = 842,
) -> pymupdf.Page:
    """Create and return a new page object.

    Args:
        pno: (int) insert before this page. Default: after last page.
        width: (float) page width in points. Default: 595 (ISO A4 width).
        height: (float) page height in points. Default 842 (ISO A4 height).
    Returns:
        A pymupdf.Page object.
    """
    doc._newPage(pno, width=width, height=height)
    return doc[pno]


def insert_page(
    doc: pymupdf.Document,
    pno: int,
    text: typing.Union[str, list, None] = None,
    fontsize: float = 11,
    width: float = 595,
    height: float = 842,
    fontname: str = "helv",
    fontfile: OptStr = None,
    color: OptSeq = (0,),
) -> int:
    """Create a new PDF page and insert some text.

    Notes:
        Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text().
        For parameter details see these methods.
    """
    page = doc.new_page(pno=pno, width=width, height=height)
    if not bool(text):
        return 0
    rc = page.insert_text(
        (50, 72),
        text,
        fontsize=fontsize,
        fontname=fontname,
        fontfile=fontfile,
        color=color,
    )
    return rc


def draw_line(
    page: pymupdf.Page,
    p1: point_like,
    p2: point_like,
    color: OptSeq = (0,),
    dashes: OptStr = None,
    width: float = 1,
    lineCap: int = 0,
    lineJoin: int = 0,
    overlay: bool = True,
    morph: OptSeq = None,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc=0,
) -> pymupdf.Point:
    """Draw a line from point p1 to point p2."""
    img = page.new_shape()
    p = img.draw_line(pymupdf.Point(p1), pymupdf.Point(p2))
    img.finish(
        color=color,
        dashes=dashes,
        width=width,
        closePath=False,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return p


def draw_squiggle(
    page: pymupdf.Page,
    p1: point_like,
    p2: point_like,
    breadth: float = 2,
    color: OptSeq = (0,),
    dashes: OptStr = None,
    width: float = 1,
    lineCap: int = 0,
    lineJoin: int = 0,
    overlay: bool = True,
    morph: OptSeq = None,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> pymupdf.Point:
    """Draw a squiggly line from point p1 to point p2."""
    img = page.new_shape()
    p = img.draw_squiggle(pymupdf.Point(p1), pymupdf.Point(p2), breadth=breadth)
    img.finish(
        color=color,
        dashes=dashes,
        width=width,
        closePath=False,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return p


def draw_zigzag(
    page: pymupdf.Page,
    p1: point_like,
    p2: point_like,
    breadth: float = 2,
    color: OptSeq = (0,),
    dashes: OptStr = None,
    width: float = 1,
    lineCap: int = 0,
    lineJoin: int = 0,
    overlay: bool = True,
    morph: OptSeq = None,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> pymupdf.Point:
    """Draw a zigzag line from point p1 to point p2."""
    img = page.new_shape()
    p = img.draw_zigzag(pymupdf.Point(p1), pymupdf.Point(p2), breadth=breadth)
    img.finish(
        color=color,
        dashes=dashes,
        width=width,
        closePath=False,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return p


def draw_rect(
        page: pymupdf.Page,
        rect: rect_like,
        color: OptSeq = (0,),
        fill: OptSeq = None,
        dashes: OptStr = None,
        width: float = 1,
        lineCap: int = 0,
        lineJoin: int = 0,
        morph: OptSeq = None,
        overlay: bool = True,
        stroke_opacity: float = 1,
        fill_opacity: float = 1,
        oc: int = 0,
        radius=None,
        ) -> pymupdf.Point:
    '''
    Draw a rectangle. See Shape class method for details.
    '''
    img = page.new_shape()
    Q = img.draw_rect(pymupdf.Rect(rect), radius=radius)
    img.finish(
        color=color,
        fill=fill,
        dashes=dashes,
        width=width,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return Q


def draw_quad(
    page: pymupdf.Page,
    quad: quad_like,
    color: OptSeq = (0,),
    fill: OptSeq = None,
    dashes: OptStr = None,
    width: float = 1,
    lineCap: int = 0,
    lineJoin: int = 0,
    morph: OptSeq = None,
    overlay: bool = True,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> pymupdf.Point:
    """Draw a quadrilateral."""
    img = page.new_shape()
    Q = img.draw_quad(pymupdf.Quad(quad))
    img.finish(
        color=color,
        fill=fill,
        dashes=dashes,
        width=width,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return Q


def draw_polyline(
    page: pymupdf.Page,
    points: list,
    color: OptSeq = (0,),
    fill: OptSeq = None,
    dashes: OptStr = None,
    width: float = 1,
    morph: OptSeq = None,
    lineCap: int = 0,
    lineJoin: int = 0,
    overlay: bool = True,
    closePath: bool = False,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> pymupdf.Point:
    """Draw multiple connected line segments."""
    img = page.new_shape()
    Q = img.draw_polyline(points)
    img.finish(
        color=color,
        fill=fill,
        dashes=dashes,
        width=width,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        closePath=closePath,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return Q


def draw_circle(
    page: pymupdf.Page,
    center: point_like,
    radius: float,
    color: OptSeq = (0,),
    fill: OptSeq = None,
    morph: OptSeq = None,
    dashes: OptStr = None,
    width: float = 1,
    lineCap: int = 0,
    lineJoin: int = 0,
    overlay: bool = True,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> pymupdf.Point:
    """Draw a circle given its center and radius."""
    img = page.new_shape()
    Q = img.draw_circle(pymupdf.Point(center), radius)
    img.finish(
        color=color,
        fill=fill,
        dashes=dashes,
        width=width,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)
    return Q


def draw_oval(
    page: pymupdf.Page,
    rect: typing.Union[rect_like, quad_like],
    color: OptSeq = (0,),
    fill: OptSeq = None,
    dashes: OptStr = None,
    morph: OptSeq = None,
    width: float = 1,
    lineCap: int = 0,
    lineJoin: int = 0,
    overlay: bool = True,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> pymupdf.Point:
    """Draw an oval given its containing rectangle or quad."""
    img = page.new_shape()
    Q = img.draw_oval(rect)
    img.finish(
        color=color,
        fill=fill,
        dashes=dashes,
        width=width,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return Q


def draw_curve(
    page: pymupdf.Page,
    p1: point_like,
    p2: point_like,
    p3: point_like,
    color: OptSeq = (0,),
    fill: OptSeq = None,
    dashes: OptStr = None,
    width: float = 1,
    morph: OptSeq = None,
    closePath: bool = False,
    lineCap: int = 0,
    lineJoin: int = 0,
    overlay: bool = True,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> pymupdf.Point:
    """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
    img = page.new_shape()
    Q = img.draw_curve(pymupdf.Point(p1), pymupdf.Point(p2), pymupdf.Point(p3))
    img.finish(
        color=color,
        fill=fill,
        dashes=dashes,
        width=width,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        closePath=closePath,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return Q


def draw_bezier(
    page: pymupdf.Page,
    p1: point_like,
    p2: point_like,
    p3: point_like,
    p4: point_like,
    color: OptSeq = (0,),
    fill: OptSeq = None,
    dashes: OptStr = None,
    width: float = 1,
    morph: OptStr = None,
    closePath: bool = False,
    lineCap: int = 0,
    lineJoin: int = 0,
    overlay: bool = True,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> pymupdf.Point:
    """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
    img = page.new_shape()
    Q = img.draw_bezier(pymupdf.Point(p1), pymupdf.Point(p2), pymupdf.Point(p3), pymupdf.Point(p4))
    img.finish(
        color=color,
        fill=fill,
        dashes=dashes,
        width=width,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        closePath=closePath,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return Q


def draw_sector(
    page: pymupdf.Page,
    center: point_like,
    point: point_like,
    beta: float,
    color: OptSeq = (0,),
    fill: OptSeq = None,
    dashes: OptStr = None,
    fullSector: bool = True,
    morph: OptSeq = None,
    width: float = 1,
    closePath: bool = False,
    lineCap: int = 0,
    lineJoin: int = 0,
    overlay: bool = True,
    stroke_opacity: float = 1,
    fill_opacity: float = 1,
    oc: int = 0,
) -> pymupdf.Point:
    """Draw a circle sector given circle center, one arc end point and the angle of the arc.

    Parameters:
        center -- center of circle
        point -- arc end point
        beta -- angle of arc (degrees)
        fullSector -- connect arc ends with center
    """
    img = page.new_shape()
    Q = img.draw_sector(pymupdf.Point(center), pymupdf.Point(point), beta, fullSector=fullSector)
    img.finish(
        color=color,
        fill=fill,
        dashes=dashes,
        width=width,
        lineCap=lineCap,
        lineJoin=lineJoin,
        morph=morph,
        closePath=closePath,
        stroke_opacity=stroke_opacity,
        fill_opacity=fill_opacity,
        oc=oc,
    )
    img.commit(overlay)

    return Q


# ----------------------------------------------------------------------
# Name:        wx.lib.colourdb.py
# Purpose:     Adds a bunch of colour names and RGB values to the
#              colour database so they can be found by name
#
# Author:      Robin Dunn
#
# Created:     13-March-2001
# Copyright:   (c) 2001-2017 by Total Control Software
# Licence:     wxWindows license
# Tags:        phoenix-port, unittest, documented
# ----------------------------------------------------------------------


def getColorList() -> list:
    """
    Returns a list of upper-case colour names.
    :rtype: list of strings
    """
    return [name for name, r, g, b in pymupdf.colors_wx_list()]


def getColorInfoList() -> list:
    """
    Returns list of (name, red, gree, blue) tuples, where:
        name: upper-case color name.
        read, green, blue: integers in range 0..255.
    :rtype: list of tuples
    """
    return pymupdf.colors_wx_list()


def getColor(name: str) -> tuple:
    """Retrieve RGB color in PDF format by name.

    Returns:
        a triple of floats in range 0 to 1. In case of name-not-found, "white" is returned.
    """
    return pymupdf.colors_pdf_dict().get(name.lower(), (1, 1, 1))


def getColorHSV(name: str) -> tuple:
    """Retrieve the hue, saturation, value triple of a color name.

    Returns:
        a triple (degree, percent, percent). If not found (-1, -1, -1) is returned.
    """
    try:
        x = getColorInfoList()[getColorList().index(name.upper())]
    except Exception:
        if g_exceptions_verbose:    pymupdf.exception_info()
        return (-1, -1, -1)

    r = x[1] / 255.0
    g = x[2] / 255.0
    b = x[3] / 255.0
    cmax = max(r, g, b)
    V = round(cmax * 100, 1)
    cmin = min(r, g, b)
    delta = cmax - cmin
    if delta == 0:
        hue = 0
    elif cmax == r:
        hue = 60.0 * (((g - b) / delta) % 6)
    elif cmax == g:
        hue = 60.0 * (((b - r) / delta) + 2)
    else:
        hue = 60.0 * (((r - g) / delta) + 4)

    H = int(round(hue))

    if cmax == 0:
        sat = 0
    else:
        sat = delta / cmax
    S = int(round(sat * 100))

    return (H, S, V)


def _get_font_properties(doc: pymupdf.Document, xref: int) -> tuple:
    fontname, ext, stype, buffer = doc.extract_font(xref)
    asc = 0.8
    dsc = -0.2
    if ext == "":
        return fontname, ext, stype, asc, dsc

    if buffer:
        try:
            font = pymupdf.Font(fontbuffer=buffer)
            asc = font.ascender
            dsc = font.descender
            bbox = font.bbox
            if asc - dsc < 1:
                if bbox.y0 < dsc:
                    dsc = bbox.y0
                asc = 1 - dsc
        except Exception:
            pymupdf.exception_info()
            asc *= 1.2
            dsc *= 1.2
        return fontname, ext, stype, asc, dsc
    if ext != "n/a":
        try:
            font = pymupdf.Font(fontname)
            asc = font.ascender
            dsc = font.descender
        except Exception:
            pymupdf.exception_info()
            asc *= 1.2
            dsc *= 1.2
    else:
        asc *= 1.2
        dsc *= 1.2
    return fontname, ext, stype, asc, dsc


def get_char_widths(
    doc: pymupdf.Document, xref: int, limit: int = 256, idx: int = 0, fontdict: OptDict = None
) -> list:
    """Get list of glyph information of a font.

    Notes:
        Must be provided by its XREF number. If we already dealt with the
        font, it will be recorded in doc.FontInfos. Otherwise we insert an
        entry there.
        Finally we return the glyphs for the font. This is a list of
        (glyph, width) where glyph is an integer controlling the char
        appearance, and width is a float controlling the char's spacing:
        width * fontsize is the actual space.
        For 'simple' fonts, glyph == ord(char) will usually be true.
        Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
    """
    fontinfo = pymupdf.CheckFontInfo(doc, xref)
    if fontinfo is None:  # not recorded yet: create it
        if fontdict is None:
            name, ext, stype, asc, dsc = _get_font_properties(doc, xref)
            fontdict = {
                "name": name,
                "type": stype,
                "ext": ext,
                "ascender": asc,
                "descender": dsc,
            }
        else:
            name = fontdict["name"]
            ext = fontdict["ext"]
            stype = fontdict["type"]
            ordering = fontdict["ordering"]
            simple = fontdict["simple"]

        if ext == "":
            raise ValueError("xref is not a font")

        # check for 'simple' fonts
        if stype in ("Type1", "MMType1", "TrueType"):
            simple = True
        else:
            simple = False

        # check for CJK fonts
        if name in ("Fangti", "Ming"):
            ordering = 0
        elif name in ("Heiti", "Song"):
            ordering = 1
        elif name in ("Gothic", "Mincho"):
            ordering = 2
        elif name in ("Dotum", "Batang"):
            ordering = 3
        else:
            ordering = -1

        fontdict["simple"] = simple

        if name == "ZapfDingbats":
            glyphs = pymupdf.zapf_glyphs
        elif name == "Symbol":
            glyphs = pymupdf.symbol_glyphs
        else:
            glyphs = None

        fontdict["glyphs"] = glyphs
        fontdict["ordering"] = ordering
        fontinfo = [xref, fontdict]
        doc.FontInfos.append(fontinfo)
    else:
        fontdict = fontinfo[1]
        glyphs = fontdict["glyphs"]
        simple = fontdict["simple"]
        ordering = fontdict["ordering"]

    if glyphs is None:
        oldlimit = 0
    else:
        oldlimit = len(glyphs)

    mylimit = max(256, limit)

    if mylimit <= oldlimit:
        return glyphs

    if ordering < 0:  # not a CJK font
        glyphs = doc._get_char_widths(
            xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
        )
    else:  # CJK fonts use char codes and width = 1
        glyphs = None

    fontdict["glyphs"] = glyphs
    fontinfo[1] = fontdict
    pymupdf.UpdateFontInfo(doc, fontinfo)

    return glyphs


class Shape:
    """Create a new shape."""

    @staticmethod
    def horizontal_angle(C, P):
        """Return the angle to the horizontal for the connection from C to P.
        This uses the arcus sine function and resolves its inherent ambiguity by
        looking up in which quadrant vector S = P - C is located.
        """
        S = pymupdf.Point(P - C).unit  # unit vector 'C' -> 'P'
        alfa = math.asin(abs(S.y))  # absolute angle from horizontal
        if S.x < 0:  # make arcsin result unique
            if S.y <= 0:  # bottom-left
                alfa = -(math.pi - alfa)
            else:  # top-left
                alfa = math.pi - alfa
        else:
            if S.y >= 0:  # top-right
                pass
            else:  # bottom-right
                alfa = -alfa
        return alfa

    def __init__(self, page: pymupdf.Page):
        pymupdf.CheckParent(page)
        self.page = page
        self.doc = page.parent
        if not self.doc.is_pdf:
            raise ValueError("is no PDF")
        self.height = page.mediabox_size.y
        self.width = page.mediabox_size.x
        self.x = page.cropbox_position.x
        self.y = page.cropbox_position.y

        self.pctm = page.transformation_matrix  # page transf. matrix
        self.ipctm = ~self.pctm  # inverted transf. matrix

        self.draw_cont = ""
        self.text_cont = ""
        self.totalcont = ""
        self.last_point = None
        self.rect = None

    def updateRect(self, x):
        if self.rect is None:
            if len(x) == 2:
                self.rect = pymupdf.Rect(x, x)
            else:
                self.rect = pymupdf.Rect(x)

        else:
            if len(x) == 2:
                x = pymupdf.Point(x)
                self.rect.x0 = min(self.rect.x0, x.x)
                self.rect.y0 = min(self.rect.y0, x.y)
                self.rect.x1 = max(self.rect.x1, x.x)
                self.rect.y1 = max(self.rect.y1, x.y)
            else:
                x = pymupdf.Rect(x)
                self.rect.x0 = min(self.rect.x0, x.x0)
                self.rect.y0 = min(self.rect.y0, x.y0)
                self.rect.x1 = max(self.rect.x1, x.x1)
                self.rect.y1 = max(self.rect.y1, x.y1)

    def draw_line(self, p1: point_like, p2: point_like) -> pymupdf.Point:
        """Draw a line between two points."""
        p1 = pymupdf.Point(p1)
        p2 = pymupdf.Point(p2)
        if not (self.last_point == p1):
            self.draw_cont += _format_g(pymupdf.JM_TUPLE(p1 * self.ipctm)) + " m\n"
            self.last_point = p1
            self.updateRect(p1)

        self.draw_cont += _format_g(pymupdf.JM_TUPLE(p2 * self.ipctm)) + " l\n"
        self.updateRect(p2)
        self.last_point = p2
        return self.last_point

    def draw_polyline(self, points: list) -> pymupdf.Point:
        """Draw several connected line segments."""
        for i, p in enumerate(points):
            if i == 0:
                if not (self.last_point == pymupdf.Point(p)):
                    self.draw_cont += _format_g(pymupdf.JM_TUPLE(pymupdf.Point(p) * self.ipctm)) + " m\n"
                    self.last_point = pymupdf.Point(p)
            else:
                self.draw_cont += _format_g(pymupdf.JM_TUPLE(pymupdf.Point(p) * self.ipctm)) + " l\n"
            self.updateRect(p)

        self.last_point = pymupdf.Point(points[-1])
        return self.last_point

    def draw_bezier(
        self,
        p1: point_like,
        p2: point_like,
        p3: point_like,
        p4: point_like,
    ) -> pymupdf.Point:
        """Draw a standard cubic Bezier curve."""
        p1 = pymupdf.Point(p1)
        p2 = pymupdf.Point(p2)
        p3 = pymupdf.Point(p3)
        p4 = pymupdf.Point(p4)
        if not (self.last_point == p1):
            self.draw_cont += _format_g(pymupdf.JM_TUPLE(p1 * self.ipctm)) + " m\n"
        args = pymupdf.JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
        self.draw_cont += _format_g(args) + " c\n"
        self.updateRect(p1)
        self.updateRect(p2)
        self.updateRect(p3)
        self.updateRect(p4)
        self.last_point = p4
        return self.last_point

    def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> pymupdf.Point:
        """Draw an ellipse inside a tetrapod."""
        if len(tetra) != 4:
            raise ValueError("invalid arg length")
        if hasattr(tetra[0], "__float__"):
            q = pymupdf.Rect(tetra).quad
        else:
            q = pymupdf.Quad(tetra)

        mt = q.ul + (q.ur - q.ul) * 0.5
        mr = q.ur + (q.lr - q.ur) * 0.5
        mb = q.ll + (q.lr - q.ll) * 0.5
        ml = q.ul + (q.ll - q.ul) * 0.5
        if not (self.last_point == ml):
            self.draw_cont += _format_g(pymupdf.JM_TUPLE(ml * self.ipctm)) + " m\n"
            self.last_point = ml
        self.draw_curve(ml, q.ll, mb)
        self.draw_curve(mb, q.lr, mr)
        self.draw_curve(mr, q.ur, mt)
        self.draw_curve(mt, q.ul, ml)
        self.updateRect(q.rect)
        self.last_point = ml
        return self.last_point

    def draw_circle(self, center: point_like, radius: float) -> pymupdf.Point:
        """Draw a circle given its center and radius."""
        if not radius > pymupdf.EPSILON:
            raise ValueError("radius must be positive")
        center = pymupdf.Point(center)
        p1 = center - (radius, 0)
        return self.draw_sector(center, p1, 360, fullSector=False)

    def draw_curve(
        self,
        p1: point_like,
        p2: point_like,
        p3: point_like,
    ) -> pymupdf.Point:
        """Draw a curve between points using one control point."""
        kappa = 0.55228474983
        p1 = pymupdf.Point(p1)
        p2 = pymupdf.Point(p2)
        p3 = pymupdf.Point(p3)
        k1 = p1 + (p2 - p1) * kappa
        k2 = p3 + (p2 - p3) * kappa
        return self.draw_bezier(p1, k1, k2, p3)

    def draw_sector(
        self,
        center: point_like,
        point: point_like,
        beta: float,
        fullSector: bool = True,
    ) -> pymupdf.Point:
        """Draw a circle sector."""
        center = pymupdf.Point(center)
        point = pymupdf.Point(point)
        l3 = lambda a, b: _format_g((a, b)) + " m\n"
        l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n"
        l5 = lambda a, b: _format_g((a, b)) + " l\n"
        betar = math.radians(-beta)
        w360 = math.radians(math.copysign(360, betar)) * (-1)
        w90 = math.radians(math.copysign(90, betar))
        w45 = w90 / 2
        while abs(betar) > 2 * math.pi:
            betar += w360  # bring angle below 360 degrees
        if not (self.last_point == point):
            self.draw_cont += l3(*pymupdf.JM_TUPLE(point * self.ipctm))
            self.last_point = point
        Q = pymupdf.Point(0, 0)  # just make sure it exists
        C = center
        P = point
        S = P - C  # vector 'center' -> 'point'
        rad = abs(S)  # circle radius

        if not rad > pymupdf.EPSILON:
            raise ValueError("radius must be positive")

        alfa = self.horizontal_angle(center, point)
        while abs(betar) > abs(w90):  # draw 90 degree arcs
            q1 = C.x + math.cos(alfa + w90) * rad
            q2 = C.y + math.sin(alfa + w90) * rad
            Q = pymupdf.Point(q1, q2)  # the arc's end point
            r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
            r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
            R = pymupdf.Point(r1, r2)  # crossing point of tangents
            kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
            kappa = kappah * abs(P - Q)
            cp1 = P + (R - P) * kappa  # control point 1
            cp2 = Q + (R - Q) * kappa  # control point 2
            self.draw_cont += l4(*pymupdf.JM_TUPLE(
                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
            ))

            betar -= w90  # reduce param angle by 90 deg
            alfa += w90  # advance start angle by 90 deg
            P = Q  # advance to arc end point
        # draw (remaining) arc
        if abs(betar) > 1e-3:  # significant degrees left?
            beta2 = betar / 2
            q1 = C.x + math.cos(alfa + betar) * rad
            q2 = C.y + math.sin(alfa + betar) * rad
            Q = pymupdf.Point(q1, q2)  # the arc's end point
            r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
            r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
            R = pymupdf.Point(r1, r2)  # crossing point of tangents
            # kappa height is 4/3 of segment height
            kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q)  # kappa height
            kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
            cp1 = P + (R - P) * kappa  # control point 1
            cp2 = Q + (R - Q) * kappa  # control point 2
            self.draw_cont += l4(*pymupdf.JM_TUPLE(
                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
            ))
        if fullSector:
            self.draw_cont += l3(*pymupdf.JM_TUPLE(point * self.ipctm))
            self.draw_cont += l5(*pymupdf.JM_TUPLE(center * self.ipctm))
            self.draw_cont += l5(*pymupdf.JM_TUPLE(Q * self.ipctm))
        self.last_point = Q
        return self.last_point

    def draw_rect(self, rect: rect_like, *, radius=None) -> pymupdf.Point:
        """Draw a rectangle.

        Args:
            radius: if not None, the rectangle will have rounded corners.
                This is the radius of the curvature, given as percentage of
                the rectangle width or height. Valid are values 0 < v <= 0.5.
                For a sequence of two values, the corners will have different
                radii. Otherwise, the percentage will be computed from the
                shorter side. A value of (0.5, 0.5) will draw an ellipse.
        """
        r = pymupdf.Rect(rect)
        if radius is None:  # standard rectangle
            self.draw_cont += _format_g(pymupdf.JM_TUPLE(
                list(r.bl * self.ipctm) + [r.width, r.height]
            )) + " re\n"
            self.updateRect(r)
            self.last_point = r.tl
            return self.last_point
        # rounded corners requested. This requires 1 or 2 values, each
        # with 0 < value <= 0.5
        if hasattr(radius, "__float__"):
            if radius <= 0 or radius > 0.5:
                raise ValueError(f"bad radius value {radius}.")
            d = min(r.width, r.height) * radius
            px = (d, 0)
            py = (0, d)
        elif hasattr(radius, "__len__") and len(radius) == 2:
            rx, ry = radius
            px = (rx * r.width, 0)
            py = (0, ry * r.height)
            if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
                raise ValueError(f"bad radius value {radius}.")
        else:
            raise ValueError(f"bad radius value {radius}.")

        lp = self.draw_line(r.tl + py, r.bl - py)
        lp = self.draw_curve(lp, r.bl, r.bl + px)

        lp = self.draw_line(lp, r.br - px)
        lp = self.draw_curve(lp, r.br, r.br - py)

        lp = self.draw_line(lp, r.tr + py)
        lp = self.draw_curve(lp, r.tr, r.tr - px)

        lp = self.draw_line(lp, r.tl + px)
        self.last_point = self.draw_curve(lp, r.tl, r.tl + py)

        self.updateRect(r)
        return self.last_point

    def draw_quad(self, quad: quad_like) -> pymupdf.Point:
        """Draw a Quad."""
        q = pymupdf.Quad(quad)
        return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])

    def draw_zigzag(
        self,
        p1: point_like,
        p2: point_like,
        breadth: float = 2,
    ) -> pymupdf.Point:
        """Draw a zig-zagged line from p1 to p2."""
        p1 = pymupdf.Point(p1)
        p2 = pymupdf.Point(p2)
        S = p2 - p1  # vector start - end
        rad = abs(S)  # distance of points
        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
        if cnt < 4:
            raise ValueError("points too close")
        mb = rad / cnt  # revised breadth
        matrix = pymupdf.Matrix(pymupdf.util_hor_matrix(p1, p2))  # normalize line to x-axis
        i_mat = ~matrix  # get original position
        points = []  # stores edges
        for i in range(1, cnt):
            if i % 4 == 1:  # point "above" connection
                p = pymupdf.Point(i, -1) * mb
            elif i % 4 == 3:  # point "below" connection
                p = pymupdf.Point(i, 1) * mb
            else:  # ignore others
                continue
            points.append(p * i_mat)
        self.draw_polyline([p1] + points + [p2])  # add start and end points
        return p2

    def draw_squiggle(
        self,
        p1: point_like,
        p2: point_like,
        breadth=2,
    ) -> pymupdf.Point:
        """Draw a squiggly line from p1 to p2."""
        p1 = pymupdf.Point(p1)
        p2 = pymupdf.Point(p2)
        S = p2 - p1  # vector start - end
        rad = abs(S)  # distance of points
        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
        if cnt < 4:
            raise ValueError("points too close")
        mb = rad / cnt  # revised breadth
        matrix = pymupdf.Matrix(pymupdf.util_hor_matrix(p1, p2))  # normalize line to x-axis
        i_mat = ~matrix  # get original position
        k = 2.4142135623765633  # y of draw_curve helper point

        points = []  # stores edges
        for i in range(1, cnt):
            if i % 4 == 1:  # point "above" connection
                p = pymupdf.Point(i, -k) * mb
            elif i % 4 == 3:  # point "below" connection
                p = pymupdf.Point(i, k) * mb
            else:  # else on connection line
                p = pymupdf.Point(i, 0) * mb
            points.append(p * i_mat)

        points = [p1] + points + [p2]
        cnt = len(points)
        i = 0
        while i + 2 < cnt:
            self.draw_curve(points[i], points[i + 1], points[i + 2])
            i += 2
        return p2

    # ==============================================================================
    # Shape.insert_text
    # ==============================================================================
    def insert_text(
        self,
        point: point_like,
        buffer: typing.Union[str, list],
        *,
        fontsize: float = 11,
        lineheight: OptFloat = None,
        fontname: str = "helv",
        fontfile: OptStr = None,
        set_simple: bool = 0,
        encoding: int = 0,
        color: OptSeq = None,
        fill: OptSeq = None,
        render_mode: int = 0,
        border_width: float = 0.05,
        miter_limit: float = 1,
        rotate: int = 0,
        morph: OptSeq = None,
        stroke_opacity: float = 1,
        fill_opacity: float = 1,
        oc: int = 0,
    ) -> int:

        # ensure 'text' is a list of strings, worth dealing with
        if not bool(buffer):
            return 0

        if type(buffer) not in (list, tuple):
            text = buffer.splitlines()
        else:
            text = buffer

        if not len(text) > 0:
            return 0

        point = pymupdf.Point(point)
        try:
            maxcode = max([ord(c) for c in " ".join(text)])
        except Exception:
            pymupdf.exception_info()
            return 0

        # ensure valid 'fontname'
        fname = fontname
        if fname.startswith("/"):
            fname = fname[1:]

        xref = self.page.insert_font(
            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
        )
        fontinfo = pymupdf.CheckFontInfo(self.doc, xref)

        fontdict = fontinfo[1]
        ordering = fontdict["ordering"]
        simple = fontdict["simple"]
        bfname = fontdict["name"]
        ascender = fontdict["ascender"]
        descender = fontdict["descender"]
        if lineheight:
            lheight = fontsize * lineheight
        elif ascender - descender <= 1:
            lheight = fontsize * 1.2
        else:
            lheight = fontsize * (ascender - descender)

        if maxcode > 255:
            glyphs = self.doc.get_char_widths(xref, maxcode + 1)
        else:
            glyphs = fontdict["glyphs"]

        tab = []
        for t in text:
            if simple and bfname not in ("Symbol", "ZapfDingbats"):
                g = None
            else:
                g = glyphs
            tab.append(pymupdf.getTJstr(t, g, simple, ordering))
        text = tab

        color_str = pymupdf.ColorCode(color, "c")
        fill_str = pymupdf.ColorCode(fill, "f")
        if not fill and render_mode == 0:  # ensure fill color when 0 Tr
            fill = color
            fill_str = pymupdf.ColorCode(color, "f")

        morphing = pymupdf.CheckMorph(morph)
        rot = rotate
        if rot % 90 != 0:
            raise ValueError("bad rotate value")

        while rot < 0:
            rot += 360
        rot = rot % 360  # text rotate = 0, 90, 270, 180

        templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf "
        templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates 90 deg counter-clockwise
        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates 90 deg clockwise
        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
        height = self.height
        width = self.width

        # setting up for standard rotation directions
        # case rotate = 0
        if morphing:
            m1 = pymupdf.Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
            mat = ~m1 * morph[1] * m1
            cm = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n"
        else:
            cm = ""
        top = height - point.y - self.y  # start of 1st char
        left = point.x + self.x  # start of 1. char
        space = top  # space available
        #headroom = point.y + self.y  # distance to page border
        if rot == 90:
            left = height - point.y - self.y
            top = -point.x - self.x
            cm += cmp90
            space = width - abs(top)
            #headroom = point.x + self.x

        elif rot == 270:
            left = -height + point.y + self.y
            top = point.x + self.x
            cm += cmm90
            space = abs(top)
            #headroom = width - point.x - self.x

        elif rot == 180:
            left = -point.x - self.x
            top = -height + point.y + self.y
            cm += cm180
            space = abs(point.y + self.y)
            #headroom = height - point.y - self.y

        optcont = self.page._get_optional_content(oc)
        if optcont is not None:
            bdc = "/OC /%s BDC\n" % optcont
            emc = "EMC\n"
        else:
            bdc = emc = ""

        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
        if alpha is None:
            alpha = ""
        else:
            alpha = "/%s gs\n" % alpha
        nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)

        if render_mode > 0:
            nres += "%i Tr " % render_mode
            nres += _format_g(border_width * fontsize) + " w "
            if miter_limit is not None:
                nres += _format_g(miter_limit) + " M "
        if color is not None:
            nres += color_str
        if fill is not None:
            nres += fill_str

        # =========================================================================
        #   start text insertion
        # =========================================================================
        nres += text[0]
        nlines = 1  # set output line counter
        if len(text) > 1:
            nres += templ2(lheight)  # line 1
        else:
            nres += 'TJ'
        for i in range(1, len(text)):
            if space < lheight:
                break  # no space left on page
            if i > 1:
                nres += "\nT* "
            nres += text[i] + 'TJ'
            space -= lheight
            nlines += 1

        nres += "\nET\n%sQ\n" % emc

        # =========================================================================
        #   end of text insertion
        # =========================================================================
        # update the /Contents object
        self.text_cont += nres
        return nlines

    # ==============================================================================
    # Shape.insert_textbox
    # ==============================================================================
    def insert_textbox(
        self,
        rect: rect_like,
        buffer: typing.Union[str, list],
        *,
        fontname: OptStr = "helv",
        fontfile: OptStr = None,
        fontsize: float = 11,
        lineheight: OptFloat = None,
        set_simple: bool = 0,
        encoding: int = 0,
        color: OptSeq = None,
        fill: OptSeq = None,
        expandtabs: int = 1,
        border_width: float = 0.05,
        miter_limit: float = 1,
        align: int = 0,
        render_mode: int = 0,
        rotate: int = 0,
        morph: OptSeq = None,
        stroke_opacity: float = 1,
        fill_opacity: float = 1,
        oc: int = 0,
    ) -> float:
        """Insert text into a given rectangle.

        Args:
            rect -- the textbox to fill
            buffer -- text to be inserted
            fontname -- a Base-14 font, font name or '/name'
            fontfile -- name of a font file
            fontsize -- font size
            lineheight -- overwrite the font property
            color -- RGB stroke color triple
            fill -- RGB fill color triple
            render_mode -- text rendering control
            border_width -- thickness of glyph borders as percentage of fontsize
            expandtabs -- handles tabulators with string function
            align -- left, center, right, justified
            rotate -- 0, 90, 180, or 270 degrees
            morph -- morph box with a matrix and a fixpoint
        Returns:
            unused or deficit rectangle area (float)
        """
        rect = pymupdf.Rect(rect)
        if rect.is_empty or rect.is_infinite:
            raise ValueError("text box must be finite and not empty")

        color_str = pymupdf.ColorCode(color, "c")
        fill_str = pymupdf.ColorCode(fill, "f")
        if fill is None and render_mode == 0:  # ensure fill color for 0 Tr
            fill = color
            fill_str = pymupdf.ColorCode(color, "f")

        optcont = self.page._get_optional_content(oc)
        if optcont is not None:
            bdc = "/OC /%s BDC\n" % optcont
            emc = "EMC\n"
        else:
            bdc = emc = ""

        # determine opacity / transparency
        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
        if alpha is None:
            alpha = ""
        else:
            alpha = "/%s gs\n" % alpha

        if rotate % 90 != 0:
            raise ValueError("rotate must be multiple of 90")

        rot = rotate
        while rot < 0:
            rot += 360
        rot = rot % 360

        # is buffer worth of dealing with?
        if not bool(buffer):
            return rect.height if rot in (0, 180) else rect.width

        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates counter-clockwise
        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates clockwise
        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
        height = self.height

        fname = fontname
        if fname.startswith("/"):
            fname = fname[1:]

        xref = self.page.insert_font(
            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
        )
        fontinfo = pymupdf.CheckFontInfo(self.doc, xref)

        fontdict = fontinfo[1]
        ordering = fontdict["ordering"]
        simple = fontdict["simple"]
        glyphs = fontdict["glyphs"]
        bfname = fontdict["name"]
        ascender = fontdict["ascender"]
        descender = fontdict["descender"]

        if lineheight:
            lheight_factor = lineheight
        elif ascender - descender <= 1:
            lheight_factor = 1.2
        else:
            lheight_factor = ascender - descender
        lheight = fontsize * lheight_factor

        # create a list from buffer, split into its lines
        if type(buffer) in (list, tuple):
            t0 = "\n".join(buffer)
        else:
            t0 = buffer

        maxcode = max([ord(c) for c in t0])
        # replace invalid char codes for simple fonts
        if simple and maxcode > 255:
            t0 = "".join([c if ord(c) < 256 else "?" for c in t0])

        t0 = t0.splitlines()

        glyphs = self.doc.get_char_widths(xref, maxcode + 1)
        if simple and bfname not in ("Symbol", "ZapfDingbats"):
            tj_glyphs = None
        else:
            tj_glyphs = glyphs

        # ----------------------------------------------------------------------
        # calculate pixel length of a string
        # ----------------------------------------------------------------------
        def pixlen(x):
            """Calculate pixel length of x."""
            if ordering < 0:
                return sum([glyphs[ord(c)][1] for c in x]) * fontsize
            else:
                return len(x) * fontsize

        # ---------------------------------------------------------------------

        if ordering < 0:
            blen = glyphs[32][1] * fontsize  # pixel size of space character
        else:
            blen = fontsize

        text = ""  # output buffer

        if pymupdf.CheckMorph(morph):
            m1 = pymupdf.Matrix(
                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
            )
            mat = ~m1 * morph[1] * m1
            cm = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n"
        else:
            cm = ""

        # ---------------------------------------------------------------------
        # adjust for text orientation / rotation
        # ---------------------------------------------------------------------
        progr = 1  # direction of line progress
        c_pnt = pymupdf.Point(0, fontsize * ascender)  # used for line progress
        if rot == 0:  # normal orientation
            point = rect.tl + c_pnt  # line 1 is 'lheight' below top
            maxwidth = rect.width  # pixels available in one line
            maxheight = rect.height  # available text height

        elif rot == 90:  # rotate counter clockwise
            c_pnt = pymupdf.Point(fontsize * ascender, 0)  # progress in x-direction
            point = rect.bl + c_pnt  # line 1 'lheight' away from left
            maxwidth = rect.height  # pixels available in one line
            maxheight = rect.width  # available text height
            cm += cmp90

        elif rot == 180:  # text upside down
            # progress upwards in y direction
            c_pnt = -pymupdf.Point(0, fontsize * ascender)
            point = rect.br + c_pnt  # line 1 'lheight' above bottom
            maxwidth = rect.width  # pixels available in one line
            progr = -1  # subtract lheight for next line
            maxheight =rect.height  # available text height
            cm += cm180

        else:  # rotate clockwise (270 or -90)
            # progress from right to left
            c_pnt = -pymupdf.Point(fontsize * ascender, 0)
            point = rect.tr + c_pnt  # line 1 'lheight' left of right
            maxwidth = rect.height  # pixels available in one line
            progr = -1  # subtract lheight for next line
            maxheight = rect.width  # available text height
            cm += cmm90

        # =====================================================================
        # line loop
        # =====================================================================
        just_tab = []  # 'justify' indicators per line

        for i, line in enumerate(t0):
            line_t = line.expandtabs(expandtabs).split(" ")  # split into words
            num_words = len(line_t)
            lbuff = ""  # init line buffer
            rest = maxwidth  # available line pixels
            # =================================================================
            # word loop
            # =================================================================
            for j in range(num_words):
                word = line_t[j]
                pl_w = pixlen(word)  # pixel len of word
                if rest >= pl_w:  # does it fit on the line?
                    lbuff += word + " "  # yes, append word
                    rest -= pl_w + blen  # update available line space
                    continue  # next word

                # word doesn't fit - output line (if not empty)
                if lbuff:
                    lbuff = lbuff.rstrip() + "\n"  # line full, append line break
                    text += lbuff  # append to total text
                    just_tab.append(True)  # can align-justify

                lbuff = ""  # re-init line buffer
                rest = maxwidth  # re-init avail. space

                if pl_w <= maxwidth:  # word shorter than 1 line?
                    lbuff = word + " "  # start the line with it
                    rest = maxwidth - pl_w - blen  # update free space
                    continue

                # long word: split across multiple lines - char by char ...
                if len(just_tab) > 0:
                    just_tab[-1] = False  # cannot align-justify
                for c in word:
                    if pixlen(lbuff) <= maxwidth - pixlen(c):
                        lbuff += c
                    else:  # line full
                        lbuff += "\n"  # close line
                        text += lbuff  # append to text
                        just_tab.append(False)  # cannot align-justify
                        lbuff = c  # start new line with this char

                lbuff += " "  # finish long word
                rest = maxwidth - pixlen(lbuff)  # long word stored

            if lbuff:  # unprocessed line content?
                text += lbuff.rstrip()  # append to text
                just_tab.append(False)  # cannot align-justify

            if i < len(t0) - 1:  # not the last line?
                text += "\n"  # insert line break

        # compute used part of the textbox
        if text.endswith("\n"):
            text = text[:-1]
        lb_count = text.count("\n") + 1  # number of lines written

        # text height = line count * line height plus one descender value
        text_height = lheight * lb_count - descender * fontsize

        more = text_height - maxheight  # difference to height limit
        if more > pymupdf.EPSILON:  # landed too much outside rect
            return (-1) * more  # return deficit, don't output

        more = abs(more)
        if more < pymupdf.EPSILON:
            more = 0  # don't bother with epsilons
        nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm  # initialize output buffer
        templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf "
        # center, right, justify: output each line with its own specifics
        text_t = text.splitlines()  # split text in lines again
        just_tab[-1] = False  # never justify last line
        for i, t in enumerate(text_t):
            spacing = 0
            pl = maxwidth - pixlen(t)  # length of empty line part
            pnt = point + c_pnt * (i * lheight_factor)  # text start of line
            if align == 1:  # center: right shift by half width
                if rot in (0, 180):
                    pnt = pnt + pymupdf.Point(pl / 2, 0) * progr
                else:
                    pnt = pnt - pymupdf.Point(0, pl / 2) * progr
            elif align == 2:  # right: right shift by full width
                if rot in (0, 180):
                    pnt = pnt + pymupdf.Point(pl, 0) * progr
                else:
                    pnt = pnt - pymupdf.Point(0, pl) * progr
            elif align == 3:  # justify
                spaces = t.count(" ")  # number of spaces in line
                if spaces > 0 and just_tab[i]:  # if any, and we may justify
                    spacing = pl / spaces  # make every space this much larger
                else:
                    spacing = 0  # keep normal space length
            top = height - pnt.y - self.y
            left = pnt.x + self.x
            if rot == 90:
                left = height - pnt.y - self.y
                top = -pnt.x - self.x
            elif rot == 270:
                left = -height + pnt.y + self.y
                top = pnt.x + self.x
            elif rot == 180:
                left = -pnt.x - self.x
                top = -height + pnt.y + self.y

            nres += templ(left, top, fname, fontsize)

            if render_mode > 0:
                nres += "%i Tr " % render_mode
                nres += _format_g(border_width * fontsize) + " w "
                if miter_limit is not None:
                    nres += _format_g(miter_limit) + " M "

            if align == 3:
                nres += _format_g(spacing) + " Tw "

            if color is not None:
                nres += color_str
            if fill is not None:
                nres += fill_str
            nres += "%sTJ\n" % pymupdf.getTJstr(t, tj_glyphs, simple, ordering)

        nres += "ET\n%sQ\n" % emc

        self.text_cont += nres
        self.updateRect(rect)
        return more

    def finish(
        self,
        width: float = 1,
        color: OptSeq = (0,),
        fill: OptSeq = None,
        lineCap: int = 0,
        lineJoin: int = 0,
        dashes: OptStr = None,
        even_odd: bool = False,
        morph: OptSeq = None,
        closePath: bool = True,
        fill_opacity: float = 1,
        stroke_opacity: float = 1,
        oc: int = 0,
    ) -> None:
        """Finish the current drawing segment.

        Notes:
            Apply colors, opacity, dashes, line style and width, or
            morphing. Also whether to close the path
            by connecting last to first point.
        """
        if self.draw_cont == "":  # treat empty contents as no-op
            return

        if width == 0:  # border color makes no sense then
            color = None
        elif color is None:  # vice versa
            width = 0
        # if color == None and fill == None:
        #     raise ValueError("at least one of 'color' or 'fill' must be given")
        color_str = pymupdf.ColorCode(color, "c")  # ensure proper color string
        fill_str = pymupdf.ColorCode(fill, "f")  # ensure proper fill string

        optcont = self.page._get_optional_content(oc)
        if optcont is not None:
            self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
            emc = "EMC\n"
        else:
            emc = ""

        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
        if alpha is not None:
            self.draw_cont = "/%s gs\n" % alpha + self.draw_cont

        if width != 1 and width != 0:
            self.draw_cont += _format_g(width) + " w\n"

        if lineCap != 0:
            self.draw_cont = "%i J\n" % lineCap + self.draw_cont
        if lineJoin != 0:
            self.draw_cont = "%i j\n" % lineJoin + self.draw_cont

        if dashes not in (None, "", "[] 0"):
            self.draw_cont = "%s d\n" % dashes + self.draw_cont

        if closePath:
            self.draw_cont += "h\n"
            self.last_point = None

        if color is not None:
            self.draw_cont += color_str

        if fill is not None:
            self.draw_cont += fill_str
            if color is not None:
                if not even_odd:
                    self.draw_cont += "B\n"
                else:
                    self.draw_cont += "B*\n"
            else:
                if not even_odd:
                    self.draw_cont += "f\n"
                else:
                    self.draw_cont += "f*\n"
        else:
            self.draw_cont += "S\n"

        self.draw_cont += emc
        if pymupdf.CheckMorph(morph):
            m1 = pymupdf.Matrix(
                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
            )
            mat = ~m1 * morph[1] * m1
            self.draw_cont = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n" + self.draw_cont

        self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
        self.draw_cont = ""
        self.last_point = None
        return

    def commit(self, overlay: bool = True) -> None:
        """Update the page's /Contents object with Shape data.

        The argument controls whether data appear in foreground (default)
        or background.
        """
        pymupdf.CheckParent(self.page)  # doc may have died meanwhile
        self.totalcont += self.text_cont
        self.totalcont = self.totalcont.encode()

        if self.totalcont:
            if overlay:
                self.page.wrap_contents()  # ensure a balanced graphics state
            # make /Contents object with dummy stream
            xref = pymupdf.TOOLS._insert_contents(self.page, b" ", overlay)
            # update it with potential compression
            self.doc.update_stream(xref, self.totalcont)

        self.last_point = None  # clean up ...
        self.rect = None  #
        self.draw_cont = ""  # for potential ...
        self.text_cont = ""  # ...
        self.totalcont = ""  # re-use


def apply_redactions(
    page: pymupdf.Page, images: int = 2, graphics: int = 1, text: int = 0
) -> bool:
    """Apply the redaction annotations of the page.

    Args:
        page: the PDF page.
        images:
              0 - ignore images
              1 - remove all overlapping images
              2 - blank out overlapping image parts
              3 - remove image unless invisible
        graphics:
              0 - ignore graphics
              1 - remove graphics if contained in rectangle
              2 - remove all overlapping graphics
        text:
              0 - remove text
              1 - ignore text
    """

    def center_rect(annot_rect, new_text, font, fsize):
        """Calculate minimal sub-rectangle for the overlay text.

        Notes:
            Because 'insert_textbox' supports no vertical text centering,
            we calculate an approximate number of lines here and return a
            sub-rect with smaller height, which should still be sufficient.
        Args:
            annot_rect: the annotation rectangle
            new_text: the text to insert.
            font: the fontname. Must be one of the CJK or Base-14 set, else
                the rectangle is returned unchanged.
            fsize: the fontsize
        Returns:
            A rectangle to use instead of the annot rectangle.
        """
        if not new_text or annot_rect.width <= pymupdf.EPSILON:
            return annot_rect
        try:
            text_width = pymupdf.get_text_length(new_text, font, fsize)
        except (ValueError, mupdf.FzErrorBase):  # unsupported font
            if g_exceptions_verbose:
                pymupdf.exception_info()
            return annot_rect
        line_height = fsize * 1.2
        limit = annot_rect.width
        h = math.ceil(text_width / limit) * line_height  # estimate rect height
        if h >= annot_rect.height:
            return annot_rect
        r = annot_rect
        y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
        r.y0 = y
        return r

    pymupdf.CheckParent(page)
    doc = page.parent
    if doc.is_encrypted or doc.is_closed:
        raise ValueError("document closed or encrypted")
    if not doc.is_pdf:
        raise ValueError("is no PDF")

    redact_annots = []  # storage of annot values
    for annot in page.annots(
        types=(pymupdf.PDF_ANNOT_REDACT,)  # pylint: disable=no-member
    ):
        # loop redactions
        redact_annots.append(annot._get_redact_values())  # save annot values

    if redact_annots == []:  # any redactions on this page?
        return False  # no redactions

    rc = page._apply_redactions(text, images, graphics)  # call MuPDF
    if not rc:  # should not happen really
        raise ValueError("Error applying redactions.")

    # now write replacement text in old redact rectangles
    shape = page.new_shape()
    for redact in redact_annots:
        annot_rect = redact["rect"]
        fill = redact["fill"]
        if fill:
            shape.draw_rect(annot_rect)  # colorize the rect background
            shape.finish(fill=fill, color=fill)
        if "text" in redact.keys():  # if we also have text
            new_text = redact["text"]
            align = redact.get("align", 0)
            fname = redact["fontname"]
            fsize = redact["fontsize"]
            color = redact["text_color"]
            # try finding vertical centered sub-rect
            trect = center_rect(annot_rect, new_text, fname, fsize)

            rc = -1
            while rc < 0 and fsize >= 4:  # while not enough room
                # (re-) try insertion
                rc = shape.insert_textbox(
                    trect,
                    new_text,
                    fontname=fname,
                    fontsize=fsize,
                    color=color,
                    align=align,
                )
                fsize -= 0.5  # reduce font if unsuccessful
    shape.commit()  # append new contents object
    return True


# ------------------------------------------------------------------------------
# Remove potentially sensitive data from a PDF. Similar to the Adobe
# Acrobat 'sanitize' function
# ------------------------------------------------------------------------------
def scrub(
    doc: pymupdf.Document,
    attached_files: bool = True,
    clean_pages: bool = True,
    embedded_files: bool = True,
    hidden_text: bool = True,
    javascript: bool = True,
    metadata: bool = True,
    redactions: bool = True,
    redact_images: int = 0,
    remove_links: bool = True,
    reset_fields: bool = True,
    reset_responses: bool = True,
    thumbnails: bool = True,
    xml_metadata: bool = True,
) -> None:
    def remove_hidden(cont_lines):
        """Remove hidden text from a PDF page.

        Args:
            cont_lines: list of lines with /Contents content. Should have status
                from after page.cleanContents().

        Returns:
            List of /Contents lines from which hidden text has been removed.

        Notes:
            The input must have been created after the page's /Contents object(s)
            have been cleaned with page.cleanContents(). This ensures a standard
            formatting: one command per line, single spaces between operators.
            This allows for drastic simplification of this code.
        """
        out_lines = []  # will return this
        in_text = False  # indicate if within BT/ET object
        suppress = False  # indicate text suppression active
        make_return = False
        for line in cont_lines:
            if line == b"BT":  # start of text object
                in_text = True  # switch on
                out_lines.append(line)  # output it
                continue
            if line == b"ET":  # end of text object
                in_text = False  # switch off
                out_lines.append(line)  # output it
                continue
            if line == b"3 Tr":  # text suppression operator
                suppress = True  # switch on
                make_return = True
                continue
            if line[-2:] == b"Tr" and line[0] != b"3":
                suppress = False  # text rendering changed
                out_lines.append(line)
                continue
            if line == b"Q":  # unstack command also switches off
                suppress = False
                out_lines.append(line)
                continue
            if suppress and in_text:  # suppress hidden lines
                continue
            out_lines.append(line)
        if make_return:
            return out_lines
        else:
            return None

    if not doc.is_pdf:  # only works for PDF
        raise ValueError("is no PDF")
    if doc.is_encrypted or doc.is_closed:
        raise ValueError("closed or encrypted doc")

    if not clean_pages:
        hidden_text = False
        redactions = False

    if metadata:
        doc.set_metadata({})  # remove standard metadata

    for page in doc:
        if reset_fields:
            # reset form fields (widgets)
            for widget in page.widgets():
                widget.reset()

        if remove_links:
            links = page.get_links()  # list of all links on page
            for link in links:  # remove all links
                page.delete_link(link)

        found_redacts = False
        for annot in page.annots():
            if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
                annot.update_file(buffer_=b" ")  # set file content to empty
            if reset_responses:
                annot.delete_responses()
            if annot.type[0] == pymupdf.PDF_ANNOT_REDACT:  # pylint: disable=no-member
                found_redacts = True

        if redactions and found_redacts:
            page.apply_redactions(images=redact_images)

        if not (clean_pages or hidden_text):
            continue  # done with the page

        page.clean_contents()
        if not page.get_contents():
            continue
        if hidden_text:
            xref = page.get_contents()[0]  # only one b/o cleaning!
            cont = doc.xref_stream(xref)
            cont_lines = remove_hidden(cont.splitlines())  # remove hidden text
            if cont_lines:  # something was actually removed
                cont = b"\n".join(cont_lines)
                doc.update_stream(xref, cont)  # rewrite the page /Contents

        if thumbnails:  # remove page thumbnails?
            if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
                doc.xref_set_key(page.xref, "Thumb", "null")

    # pages are scrubbed, now perform document-wide scrubbing
    # remove embedded files
    if embedded_files:
        for name in doc.embfile_names():
            doc.embfile_del(name)

    if xml_metadata:
        doc.del_xml_metadata()
    if not (xml_metadata or javascript):
        xref_limit = 0
    else:
        xref_limit = doc.xref_length()
    for xref in range(1, xref_limit):
        if not doc.xref_object(xref):
            msg = "bad xref %i - clean PDF before scrubbing" % xref
            raise ValueError(msg)
        if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
            # a /JavaScript action object
            obj = "<</S/JavaScript/JS()>>"  # replace with a null JavaScript
            doc.update_object(xref, obj)  # update this object
            continue  # no further handling

        if not xml_metadata:
            continue

        if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
            # delete any metadata object directly
            doc.update_object(xref, "<<>>")
            doc.update_stream(xref, b"deleted", new=True)
            continue

        if doc.xref_get_key(xref, "Metadata")[0] != "null":
            doc.xref_set_key(xref, "Metadata", "null")


def _show_fz_text( text):
    #if mupdf_cppyy:
    #    assert isinstance( text, cppyy.gbl.mupdf.Text)
    #else:
    #    assert isinstance( text, mupdf.Text)
    num_spans = 0
    num_chars = 0
    span = text.m_internal.head
    while 1:
        if not span:
            break
        num_spans += 1
        num_chars += span.len
        span = span.next
    return f'num_spans={num_spans} num_chars={num_chars}'

def fill_textbox(
    writer: pymupdf.TextWriter,
    rect: rect_like,
    text: typing.Union[str, list],
    pos: point_like = None,
    font: typing.Optional[pymupdf.Font] = None,
    fontsize: float = 11,
    lineheight: OptFloat = None,
    align: int = 0,
    warn: bool = None,
    right_to_left: bool = False,
    small_caps: bool = False,
) -> tuple:
    """Fill a rectangle with text.

    Args:
        writer: pymupdf.TextWriter object (= "self")
        rect: rect-like to receive the text.
        text: string or list/tuple of strings.
        pos: point-like start position of first word.
        font: pymupdf.Font object (default pymupdf.Font('helv')).
        fontsize: the fontsize.
        lineheight: overwrite the font property
        align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
        warn: (bool) text overflow action: none, warn, or exception
        right_to_left: (bool) indicate right-to-left language.
    """
    rect = pymupdf.Rect(rect)
    if rect.is_empty:
        raise ValueError("fill rect must not empty.")
    if type(font) is not pymupdf.Font:
        font = pymupdf.Font("helv")

    def textlen(x):
        """Return length of a string."""
        return font.text_length(
            x, fontsize=fontsize, small_caps=small_caps
        )  # abbreviation

    def char_lengths(x):
        """Return list of single character lengths for a string."""
        return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)

    def append_this(pos, text):
        ret = writer.append(
                pos, text, font=font, fontsize=fontsize, small_caps=small_caps
                )
        return ret

    tolerance = fontsize * 0.2  # extra distance to left border
    space_len = textlen(" ")
    std_width = rect.width - tolerance
    std_start = rect.x0 + tolerance

    def norm_words(width, words):
        """Cut any word in pieces no longer than 'width'."""
        nwords = []
        word_lengths = []
        for w in words:
            wl_lst = char_lengths(w)
            wl = sum(wl_lst)
            if wl <= width:  # nothing to do - copy over
                nwords.append(w)
                word_lengths.append(wl)
                continue

            # word longer than rect width - split it in parts
            n = len(wl_lst)
            while n > 0:
                wl = sum(wl_lst[:n])
                if wl <= width:
                    nwords.append(w[:n])
                    word_lengths.append(wl)
                    w = w[n:]
                    wl_lst = wl_lst[n:]
                    n = len(wl_lst)
                else:
                    n -= 1
        return nwords, word_lengths

    def output_justify(start, line):
        """Justified output of a line."""
        # ignore leading / trailing / multiple spaces
        words = [w for w in line.split(" ") if w != ""]
        nwords = len(words)
        if nwords == 0:
            return
        if nwords == 1:  # single word cannot be justified
            append_this(start, words[0])
            return
        tl = sum([textlen(w) for w in words])  # total word lengths
        gaps = nwords - 1  # number of word gaps
        gapl = (std_width - tl) / gaps  # width of each gap
        for w in words:
            _, lp = append_this(start, w)  # output one word
            start.x = lp.x + gapl  # next start at word end plus gap
        return

    asc = font.ascender
    dsc = font.descender
    if not lineheight:
        if asc - dsc <= 1:
            lheight = 1.2
        else:
            lheight = asc - dsc
    else:
        lheight = lineheight

    LINEHEIGHT = fontsize * lheight  # effective line height
    width = std_width  # available horizontal space

    # starting point of text
    if pos is not None:
        pos = pymupdf.Point(pos)
    else:  # default is just below rect top-left
        pos = rect.tl + (tolerance, fontsize * asc)
    if pos not in rect:
        raise ValueError("Text must start in rectangle.")

    # calculate displacement factor for alignment
    if align == pymupdf.TEXT_ALIGN_CENTER:
        factor = 0.5
    elif align == pymupdf.TEXT_ALIGN_RIGHT:
        factor = 1.0
    else:
        factor = 0

    # split in lines if just a string was given
    if type(text) is str:
        textlines = text.splitlines()
    else:
        textlines = []
        for line in text:
            textlines.extend(line.splitlines())

    max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1

    new_lines = []  # the final list of textbox lines
    no_justify = []  # no justify for these line numbers
    for i, line in enumerate(textlines):
        if line in ("", " "):
            new_lines.append((line, space_len))
            width = rect.width - tolerance
            no_justify.append((len(new_lines) - 1))
            continue
        if i == 0:
            width = rect.x1 - pos.x
        else:
            width = rect.width - tolerance

        if right_to_left:  # reverses Arabic / Hebrew text front to back
            line = writer.clean_rtl(line)
        tl = textlen(line)
        if tl <= width:  # line short enough
            new_lines.append((line, tl))
            no_justify.append((len(new_lines) - 1))
            continue

        # we need to split the line in fitting parts
        words = line.split(" ")  # the words in the line

        # cut in parts any words that are longer than rect width
        words, word_lengths = norm_words(width, words)

        n = len(words)
        while True:
            line0 = " ".join(words[:n])
            wl = sum(word_lengths[:n]) + space_len * (n - 1)
            if wl <= width:
                new_lines.append((line0, wl))
                words = words[n:]
                word_lengths = word_lengths[n:]
                n = len(words)
                line0 = None
            else:
                n -= 1

            if len(words) == 0:
                break
            assert n

    # -------------------------------------------------------------------------
    # List of lines created. Each item is (text, tl), where 'tl' is the PDF
    # output length (float) and 'text' is the text. Except for justified text,
    # this is output-ready.
    # -------------------------------------------------------------------------
    nlines = len(new_lines)
    if nlines > max_lines:
        msg = "Only fitting %i of %i lines." % (max_lines, nlines)
        if warn is None:
            pass
        elif warn:
            pymupdf.message("Warning: " + msg)
        else:
            raise ValueError(msg)

    start = pymupdf.Point()
    no_justify += [len(new_lines) - 1]  # no justifying of last line
    for i in range(max_lines):
        try:
            line, tl = new_lines.pop(0)
        except IndexError:
            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
            break

        if right_to_left:  # Arabic, Hebrew
            line = "".join(reversed(line))

        if i == 0:  # may have different start for first line
            start = pos

        if align == pymupdf.TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
            output_justify(start, line)
            start.x = std_start
            start.y += LINEHEIGHT
            continue

        if i > 0 or pos.x == std_start:  # left, center, right alignments
            start.x += (width - tl) * factor

        append_this(start, line)
        start.x = std_start
        start.y += LINEHEIGHT

    return new_lines  # return non-written lines


# ------------------------------------------------------------------------
# Optional Content functions
# ------------------------------------------------------------------------
def get_oc(doc: pymupdf.Document, xref: int) -> int:
    """Return optional content object xref for an image or form xobject.

    Args:
        xref: (int) xref number of an image or form xobject.
    """
    if doc.is_closed or doc.is_encrypted:
        raise ValueError("document close or encrypted")
    t, name = doc.xref_get_key(xref, "Subtype")
    if t != "name" or name not in ("/Image", "/Form"):
        raise ValueError("bad object type at xref %i" % xref)
    t, oc = doc.xref_get_key(xref, "OC")
    if t != "xref":
        return 0
    rc = int(oc.replace("0 R", ""))
    return rc


def set_oc(doc: pymupdf.Document, xref: int, oc: int) -> None:
    """Attach optional content object to image or form xobject.

    Args:
        xref: (int) xref number of an image or form xobject
        oc: (int) xref number of an OCG or OCMD
    """
    if doc.is_closed or doc.is_encrypted:
        raise ValueError("document close or encrypted")
    t, name = doc.xref_get_key(xref, "Subtype")
    if t != "name" or name not in ("/Image", "/Form"):
        raise ValueError("bad object type at xref %i" % xref)
    if oc > 0:
        t, name = doc.xref_get_key(oc, "Type")
        if t != "name" or name not in ("/OCG", "/OCMD"):
            raise ValueError("bad object type at xref %i" % oc)
    if oc == 0 and "OC" in doc.xref_get_keys(xref):
        doc.xref_set_key(xref, "OC", "null")
        return None
    doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
    return None


def set_ocmd(
    doc: pymupdf.Document,
    xref: int = 0,
    ocgs: typing.Union[list, None] = None,
    policy: OptStr = None,
    ve: typing.Union[list, None] = None,
) -> int:
    """Create or update an OCMD object in a PDF document.

    Args:
        xref: (int) 0 for creating a new object, otherwise update existing one.
        ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
        policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
        ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.

    Returns:
        Xref of the created or updated OCMD.
    """

    all_ocgs = set(doc.get_ocgs().keys())

    def ve_maker(ve):
        if type(ve) not in (list, tuple) or len(ve) < 2:
            raise ValueError("bad 've' format: %s" % ve)
        if ve[0].lower() not in ("and", "or", "not"):
            raise ValueError("bad operand: %s" % ve[0])
        if ve[0].lower() == "not" and len(ve) != 2:
            raise ValueError("bad 've' format: %s" % ve)
        item = "[/%s" % ve[0].title()
        for x in ve[1:]:
            if type(x) is int:
                if x not in all_ocgs:
                    raise ValueError("bad OCG %i" % x)
                item += " %i 0 R" % x
            else:
                item += " %s" % ve_maker(x)
        item += "]"
        return item

    text = "<</Type/OCMD"

    if ocgs and type(ocgs) in (list, tuple):  # some OCGs are provided
        s = set(ocgs).difference(all_ocgs)  # contains illegal xrefs
        if s != set():
            msg = "bad OCGs: %s" % s
            raise ValueError(msg)
        text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"

    if policy:
        policy = str(policy).lower()
        pols = {
            "anyon": "AnyOn",
            "allon": "AllOn",
            "anyoff": "AnyOff",
            "alloff": "AllOff",
        }
        if policy not in ("anyon", "allon", "anyoff", "alloff"):
            raise ValueError("bad policy: %s" % policy)
        text += "/P/%s" % pols[policy]

    if ve:
        text += "/VE%s" % ve_maker(ve)

    text += ">>"

    # make new object or replace old OCMD (check type first)
    if xref == 0:
        xref = doc.get_new_xref()
    elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
        raise ValueError("bad xref or not an OCMD")
    doc.update_object(xref, text)
    return xref


def get_ocmd(doc: pymupdf.Document, xref: int) -> dict:
    """Return the definition of an OCMD (optional content membership dictionary).

    Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
    /VE (visibility expression, PDF array). Via string manipulation, this
    info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
    and "ve" - ready to recycle as input for 'set_ocmd()'.
    """

    if xref not in range(doc.xref_length()):
        raise ValueError("bad xref")
    text = doc.xref_object(xref, compressed=True)
    if "/Type/OCMD" not in text:
        raise ValueError("bad object type")
    textlen = len(text)

    p0 = text.find("/OCGs[")  # look for /OCGs key
    p1 = text.find("]", p0)
    if p0 < 0 or p1 < 0:  # no OCGs found
        ocgs = None
    else:
        ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
        ocgs = list(map(int, ocgs))

    p0 = text.find("/P/")  # look for /P policy key
    if p0 < 0:
        policy = None
    else:
        p1 = text.find("ff", p0)
        if p1 < 0:
            p1 = text.find("on", p0)
        if p1 < 0:  # some irregular syntax
            raise ValueError("bad object at xref")
        else:
            policy = text[p0 + 3 : p1 + 2]

    p0 = text.find("/VE[")  # look for /VE visibility expression key
    if p0 < 0:  # no visibility expression found
        ve = None
    else:
        lp = rp = 0  # find end of /VE by finding last ']'.
        p1 = p0
        while lp < 1 or lp != rp:
            p1 += 1
            if not p1 < textlen:  # some irregular syntax
                raise ValueError("bad object at xref")
            if text[p1] == "[":
                lp += 1
            if text[p1] == "]":
                rp += 1
        # p1 now positioned at the last "]"
        ve = text[p0 + 3 : p1 + 1]  # the PDF /VE array
        ve = (
            ve.replace("/And", '"and",')
            .replace("/Not", '"not",')
            .replace("/Or", '"or",')
        )
        ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
        import json
        try:
            ve = json.loads(ve)
        except Exception:
            pymupdf.exception_info()
            pymupdf.message(f"bad /VE key: {ve!r}")
            raise
    return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}


"""
Handle page labels for PDF documents.

Reading
-------
* compute the label of a page
* find page number(s) having the given label.

Writing
-------
Supports setting (defining) page labels for PDF documents.

A big Thank You goes to WILLIAM CHAPMAN who contributed the idea and
significant parts of the following code during late December 2020
through early January 2021.
"""


def rule_dict(item):
    """Make a Python dict from a PDF page label rule.

    Args:
        item -- a tuple (pno, rule) with the start page number and the rule
                string like <</S/D...>>.
    Returns:
        A dict like
        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
    """
    # Jorj McKie, 2021-01-06

    pno, rule = item
    rule = rule[2:-2].split("/")[1:]  # strip "<<" and ">>"
    d = {"startpage": pno, "prefix": "", "firstpagenum": 1}
    skip = False
    for i, item in enumerate(rule): # pylint: disable=redefined-argument-from-local
        if skip:  # this item has already been processed
            skip = False  # deactivate skipping again
            continue
        if item == "S":  # style specification
            d["style"] = rule[i + 1]  # next item has the style
            skip = True  # do not process next item again
            continue
        if item.startswith("P"):  # prefix specification: extract the string
            x = item[1:].replace("(", "").replace(")", "")
            d["prefix"] = x
            continue
        if item.startswith("St"):  # start page number specification
            x = int(item[2:])
            d["firstpagenum"] = x
    return d


def get_label_pno(pgNo, labels):
    """Return the label for this page number.

    Args:
        pgNo: page number, 0-based.
        labels: result of doc._get_page_labels().
    Returns:
        The label (str) of the page number. Errors return an empty string.
    """
    # Jorj McKie, 2021-01-06

    item = [x for x in labels if x[0] <= pgNo][-1]
    rule = rule_dict(item)
    prefix = rule.get("prefix", "")
    style = rule.get("style", "")
    # make sure we start at 0 when enumerating the alphabet
    delta = -1 if style in ("a", "A") else 0
    pagenumber = pgNo - rule["startpage"] + rule["firstpagenum"] + delta
    return construct_label(style, prefix, pagenumber)


def get_label(page):
    """Return the label for this PDF page.

    Args:
        page: page object.
    Returns:
        The label (str) of the page. Errors return an empty string.
    """
    # Jorj McKie, 2021-01-06

    labels = page.parent._get_page_labels()
    if not labels:
        return ""
    labels.sort()
    return get_label_pno(page.number, labels)


def get_page_numbers(doc, label, only_one=False):
    """Return a list of page numbers with the given label.

    Args:
        doc: PDF document object (resp. 'self').
        label: (str) label.
        only_one: (bool) stop searching after first hit.
    Returns:
        List of page numbers having this label.
    """
    # Jorj McKie, 2021-01-06

    numbers = []
    if not label:
        return numbers
    labels = doc._get_page_labels()
    if labels == []:
        return numbers
    for i in range(doc.page_count):
        plabel = get_label_pno(i, labels)
        if plabel == label:
            numbers.append(i)
            if only_one:
                break
    return numbers


def construct_label(style, prefix, pno) -> str:
    """Construct a label based on style, prefix and page number."""
    # William Chapman, 2021-01-06

    n_str = ""
    if style == "D":
        n_str = str(pno)
    elif style == "r":
        n_str = integerToRoman(pno).lower()
    elif style == "R":
        n_str = integerToRoman(pno).upper()
    elif style == "a":
        n_str = integerToLetter(pno).lower()
    elif style == "A":
        n_str = integerToLetter(pno).upper()
    result = prefix + n_str
    return result


def integerToLetter(i) -> str:
    """Returns letter sequence string for integer i."""
    # William Chapman, Jorj McKie, 2021-01-06
    import string
    ls = string.ascii_uppercase
    n, a = 1, i
    while pow(26, n) <= a:
        a -= int(math.pow(26, n))
        n += 1

    str_t = ""
    for j in reversed(range(n)):
        f, g = divmod(a, int(math.pow(26, j)))
        str_t += ls[f]
        a = g
    return str_t


def integerToRoman(num: int) -> str:
    """Return roman numeral for an integer."""
    # William Chapman, Jorj McKie, 2021-01-06

    roman = (
        (1000, "M"),
        (900, "CM"),
        (500, "D"),
        (400, "CD"),
        (100, "C"),
        (90, "XC"),
        (50, "L"),
        (40, "XL"),
        (10, "X"),
        (9, "IX"),
        (5, "V"),
        (4, "IV"),
        (1, "I"),
    )

    def roman_num(num):
        for r, ltr in roman:
            x, _ = divmod(num, r)
            yield ltr * x
            num -= r * x
            if num <= 0:
                break

    return "".join([a for a in roman_num(num)])


def get_page_labels(doc):
    """Return page label definitions in PDF document.

    Args:
        doc: PDF document (resp. 'self').
    Returns:
        A list of dictionaries with the following format:
        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
    """
    # Jorj McKie, 2021-01-10
    return [rule_dict(item) for item in doc._get_page_labels()]


def set_page_labels(doc, labels):
    """Add / replace page label definitions in PDF document.

    Args:
        doc: PDF document (resp. 'self').
        labels: list of label dictionaries like:
        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
        as returned by get_page_labels().
    """
    # William Chapman, 2021-01-06

    def create_label_str(label):
        """Convert Python label dict to corresponding PDF rule string.

        Args:
            label: (dict) build rule for the label.
        Returns:
            PDF label rule string wrapped in "<<", ">>".
        """
        s = "%i<<" % label["startpage"]
        if label.get("prefix", "") != "":
            s += "/P(%s)" % label["prefix"]
        if label.get("style", "") != "":
            s += "/S/%s" % label["style"]
        if label.get("firstpagenum", 1) > 1:
            s += "/St %i" % label["firstpagenum"]
        s += ">>"
        return s

    def create_nums(labels):
        """Return concatenated string of all labels rules.

        Args:
            labels: (list) dictionaries as created by function 'rule_dict'.
        Returns:
            PDF compatible string for page label definitions, ready to be
            enclosed in PDF array 'Nums[...]'.
        """
        labels.sort(key=lambda x: x["startpage"])
        s = "".join([create_label_str(label) for label in labels])
        return s

    doc._set_page_labels(create_nums(labels))


# End of Page Label Code -------------------------------------------------


def has_links(doc: pymupdf.Document) -> bool:
    """Check whether there are links on any page."""
    if doc.is_closed:
        raise ValueError("document closed")
    if not doc.is_pdf:
        raise ValueError("is no PDF")
    for i in range(doc.page_count):
        for item in doc.page_annot_xrefs(i):
            if item[1] == pymupdf.PDF_ANNOT_LINK:  # pylint: disable=no-member
                return True
    return False


def has_annots(doc: pymupdf.Document) -> bool:
    """Check whether there are annotations on any page."""
    if doc.is_closed:
        raise ValueError("document closed")
    if not doc.is_pdf:
        raise ValueError("is no PDF")
    for i in range(doc.page_count):
        for item in doc.page_annot_xrefs(i):
            # pylint: disable=no-member
            if not (item[1] == pymupdf.PDF_ANNOT_LINK or item[1] == pymupdf.PDF_ANNOT_WIDGET):  # pylint: disable=no-member
                return True
    return False


# -------------------------------------------------------------------
# Functions to recover the quad contained in a text extraction bbox
# -------------------------------------------------------------------
def recover_bbox_quad(line_dir: tuple, span: dict, bbox: tuple) -> pymupdf.Quad:
    """Compute the quad located inside the bbox.

    The bbox may be any of the resp. tuples occurring inside the given span.

    Args:
        line_dir: (tuple) 'line["dir"]' of the owning line or None.
        span: (dict) the span. May be from get_texttrace() method.
        bbox: (tuple) the bbox of the span or any of its characters.
    Returns:
        The quad which is wrapped by the bbox.
    """
    if line_dir is None:
        line_dir = span["dir"]
    cos, sin = line_dir
    bbox = pymupdf.Rect(bbox)  # make it a rect
    if pymupdf.TOOLS.set_small_glyph_heights():  # ==> just fontsize as height
        d = 1
    else:
        d = span["ascender"] - span["descender"]

    height = d * span["size"]  # the quad's rectangle height
    # The following are distances from the bbox corners, at which we find the
    # respective quad points. The computation depends on in which quadrant the
    # text writing angle is located.
    hs = height * sin
    hc = height * cos
    if hc >= 0 and hs <= 0:  # quadrant 1
        ul = bbox.bl - (0, hc)
        ur = bbox.tr + (hs, 0)
        ll = bbox.bl - (hs, 0)
        lr = bbox.tr + (0, hc)
    elif hc <= 0 and hs <= 0:  # quadrant 2
        ul = bbox.br + (hs, 0)
        ur = bbox.tl - (0, hc)
        ll = bbox.br + (0, hc)
        lr = bbox.tl - (hs, 0)
    elif hc <= 0 and hs >= 0:  # quadrant 3
        ul = bbox.tr - (0, hc)
        ur = bbox.bl + (hs, 0)
        ll = bbox.tr - (hs, 0)
        lr = bbox.bl + (0, hc)
    else:  # quadrant 4
        ul = bbox.tl + (hs, 0)
        ur = bbox.br - (0, hc)
        ll = bbox.tl + (0, hc)
        lr = bbox.br - (hs, 0)
    return pymupdf.Quad(ul, ur, ll, lr)


def recover_quad(line_dir: tuple, span: dict) -> pymupdf.Quad:
    """Recover the quadrilateral of a text span.

    Args:
        line_dir: (tuple) 'line["dir"]' of the owning line.
        span: the span.
    Returns:
        The quadrilateral enveloping the span's text.
    """
    if type(line_dir) is not tuple or len(line_dir) != 2:
        raise ValueError("bad line dir argument")
    if type(span) is not dict:
        raise ValueError("bad span argument")
    return recover_bbox_quad(line_dir, span, span["bbox"])


def recover_line_quad(line: dict, spans: list = None) -> pymupdf.Quad:
    """Calculate the line quad for 'dict' / 'rawdict' text extractions.

    The lower quad points are those of the first, resp. last span quad.
    The upper points are determined by the maximum span quad height.
    From this, compute a rect with bottom-left in (0, 0), convert this to a
    quad and rotate and shift back to cover the text of the spans.

    Args:
        spans: (list, optional) sub-list of spans to consider.
    Returns:
        pymupdf.Quad covering selected spans.
    """
    if spans is None:  # no sub-selection
        spans = line["spans"]  # all spans
    if len(spans) == 0:
        raise ValueError("bad span list")
    line_dir = line["dir"]  # text direction
    cos, sin = line_dir
    q0 = recover_quad(line_dir, spans[0])  # quad of first span
    if len(spans) > 1:  # get quad of last span
        q1 = recover_quad(line_dir, spans[-1])
    else:
        q1 = q0  # last = first

    line_ll = q0.ll  # lower-left of line quad
    line_lr = q1.lr  # lower-right of line quad

    mat0 = pymupdf.planish_line(line_ll, line_lr)

    # map base line to x-axis such that line_ll goes to (0, 0)
    x_lr = line_lr * mat0

    small = pymupdf.TOOLS.set_small_glyph_heights()  # small glyph heights?

    h = max(
        [s["size"] * (1 if small else (s["ascender"] - s["descender"])) for s in spans]
    )

    line_rect = pymupdf.Rect(0, -h, x_lr.x, 0)  # line rectangle
    line_quad = line_rect.quad  # make it a quad and:
    line_quad *= ~mat0
    return line_quad


def recover_span_quad(line_dir: tuple, span: dict, chars: list = None) -> pymupdf.Quad:
    """Calculate the span quad for 'dict' / 'rawdict' text extractions.

    Notes:
        There are two execution paths:
        1. For the full span quad, the result of 'recover_quad' is returned.
        2. For the quad of a sub-list of characters, the char quads are
           computed and joined. This is only supported for the "rawdict"
           extraction option.

    Args:
        line_dir: (tuple) 'line["dir"]' of the owning line.
        span: (dict) the span.
        chars: (list, optional) sub-list of characters to consider.
    Returns:
        pymupdf.Quad covering selected characters.
    """
    if line_dir is None:  # must be a span from get_texttrace()
        line_dir = span["dir"]
    if chars is None:  # no sub-selection
        return recover_quad(line_dir, span)
    if "chars" not in span.keys():
        raise ValueError("need 'rawdict' option to sub-select chars")

    q0 = recover_char_quad(line_dir, span, chars[0])  # quad of first char
    if len(chars) > 1:  # get quad of last char
        q1 = recover_char_quad(line_dir, span, chars[-1])
    else:
        q1 = q0  # last = first

    span_ll = q0.ll  # lower-left of span quad
    span_lr = q1.lr  # lower-right of span quad
    mat0 = pymupdf.planish_line(span_ll, span_lr)
    # map base line to x-axis such that span_ll goes to (0, 0)
    x_lr = span_lr * mat0

    small = pymupdf.TOOLS.set_small_glyph_heights()  # small glyph heights?
    h = span["size"] * (1 if small else (span["ascender"] - span["descender"]))

    span_rect = pymupdf.Rect(0, -h, x_lr.x, 0)  # line rectangle
    span_quad = span_rect.quad  # make it a quad and:
    span_quad *= ~mat0  # rotate back and shift back
    return span_quad


def recover_char_quad(line_dir: tuple, span: dict, char: dict) -> pymupdf.Quad:
    """Recover the quadrilateral of a text character.

    This requires the "rawdict" option of text extraction.

    Args:
        line_dir: (tuple) 'line["dir"]' of the span's line.
        span: (dict) the span dict.
        char: (dict) the character dict.
    Returns:
        The quadrilateral enveloping the character.
    """
    if line_dir is None:
        line_dir = span["dir"]
    if type(line_dir) is not tuple or len(line_dir) != 2:
        raise ValueError("bad line dir argument")
    if type(span) is not dict:
        raise ValueError("bad span argument")
    if type(char) is dict:
        bbox = pymupdf.Rect(char["bbox"])
    elif type(char) is tuple:
        bbox = pymupdf.Rect(char[3])
    else:
        raise ValueError("bad span argument")

    return recover_bbox_quad(line_dir, span, bbox)


# -------------------------------------------------------------------
# Building font subsets using fontTools
# -------------------------------------------------------------------
def subset_fonts(doc: pymupdf.Document, verbose: bool = False, fallback: bool = False) -> OptInt:
    """Build font subsets in a PDF.

    Eligible fonts are potentially replaced by smaller versions. Page text is
    NOT rewritten and thus should retain properties like being hidden or
    controlled by optional content.

    This method by default uses MuPDF's own internal feature to create subset
    fonts. As this is a new function, errors may still occur. In this case,
    please fall back to using the previous version by using "fallback=True".
    Fallback mode requires the external package 'fontTools'.

    Args:
        fallback: use the older deprecated implementation.
        verbose: only used by fallback mode.

    Returns:
        The new MuPDF-based code returns None.  The deprecated fallback
        mode returns 0 if there are no fonts to subset.  Otherwise, it
        returns the decrease in fontsize (the difference in fontsize),
        measured in bytes.
    """
    # Font binaries: -  "buffer" -> (names, xrefs, (unicodes, glyphs))
    # An embedded font is uniquely defined by its fontbuffer only. It may have
    # multiple names and xrefs.
    # Once the sets of used unicodes and glyphs are known, we compute a
    # smaller version of the buffer user package fontTools.

    if not fallback:  # by default use MuPDF function
        pdf = mupdf.pdf_document_from_fz_document(doc)
        mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
        return

    font_buffers = {}

    def get_old_widths(xref):
        """Retrieve old font '/W' and '/DW' values."""
        df = doc.xref_get_key(xref, "DescendantFonts")
        if df[0] != "array":  # only handle xref specifications
            return None, None
        df_xref = int(df[1][1:-1].replace("0 R", ""))
        widths = doc.xref_get_key(df_xref, "W")
        if widths[0] != "array":  # no widths key found
            widths = None
        else:
            widths = widths[1]
        dwidths = doc.xref_get_key(df_xref, "DW")
        if dwidths[0] != "int":
            dwidths = None
        else:
            dwidths = dwidths[1]
        return widths, dwidths

    def set_old_widths(xref, widths, dwidths):
        """Restore the old '/W' and '/DW' in subsetted font.

        If either parameter is None or evaluates to False, the corresponding
        dictionary key will be set to null.
        """
        df = doc.xref_get_key(xref, "DescendantFonts")
        if df[0] != "array":  # only handle xref specs
            return None
        df_xref = int(df[1][1:-1].replace("0 R", ""))
        if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
            0
        ] != "null":
            doc.xref_set_key(df_xref, "W", "null")
        else:
            doc.xref_set_key(df_xref, "W", widths)
        if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
            df_xref, "DW"
        )[0] != "null":
            doc.xref_set_key(df_xref, "DW", "null")
        else:
            doc.xref_set_key(df_xref, "DW", dwidths)
        return None

    def set_subset_fontname(new_xref):
        """Generate a name prefix to tag a font as subset.

        We use a random generator to select 6 upper case ASCII characters.
        The prefixed name must be put in the font xref as the "/BaseFont" value
        and in the FontDescriptor object as the '/FontName' value.
        """
        # The following generates a prefix like 'ABCDEF+'
        import random
        import string
        prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
        font_str = doc.xref_object(new_xref, compressed=True)
        font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
        df = doc.xref_get_key(new_xref, "DescendantFonts")
        if df[0] == "array":
            df_xref = int(df[1][1:-1].replace("0 R", ""))
            fd = doc.xref_get_key(df_xref, "FontDescriptor")
            if fd[0] == "xref":
                fd_xref = int(fd[1].replace("0 R", ""))
                fd_str = doc.xref_object(fd_xref, compressed=True)
                fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
                doc.update_object(fd_xref, fd_str)
        doc.update_object(new_xref, font_str)

    def build_subset(buffer, unc_set, gid_set):
        """Build font subset using fontTools.

        Args:
            buffer: (bytes) the font given as a binary buffer.
            unc_set: (set) required glyph ids.
        Returns:
            Either None if subsetting is unsuccessful or the subset font buffer.
        """
        try:
            import fontTools.subset as fts
        except ImportError:
            if g_exceptions_verbose:    pymupdf.exception_info()
            pymupdf.message("This method requires fontTools to be installed.")
            raise
        import tempfile
        with tempfile.TemporaryDirectory() as tmp_dir:
            oldfont_path = f"{tmp_dir}/oldfont.ttf"
            newfont_path = f"{tmp_dir}/newfont.ttf"
            uncfile_path = f"{tmp_dir}/uncfile.txt"
            args = [
                oldfont_path,
                "--retain-gids",
                f"--output-file={newfont_path}",
                "--layout-features=*",
                "--passthrough-tables",
                "--ignore-missing-glyphs",
                "--ignore-missing-unicodes",
                "--symbol-cmap",
            ]

            # store glyph ids or unicodes as file
            with open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file:
                if 0xFFFD in unc_set:  # error unicode exists -> use glyphs
                    args.append(f"--gids-file={uncfile_path}")
                    gid_set.add(189)
                    unc_list = list(gid_set)
                    for unc in unc_list:
                        unc_file.write("%i\n" % unc)
                else:
                    args.append(f"--unicodes-file={uncfile_path}")
                    unc_set.add(255)
                    unc_list = list(unc_set)
                    for unc in unc_list:
                        unc_file.write("%04x\n" % unc)

            # store fontbuffer as a file
            with open(oldfont_path, "wb") as fontfile:
                fontfile.write(buffer)
            try:
                os.remove(newfont_path)  # remove old file
            except Exception:
                pass
            try:  # invoke fontTools subsetter
                fts.main(args)
                font = pymupdf.Font(fontfile=newfont_path)
                new_buffer = font.buffer  # subset font binary
                if font.glyph_count == 0:  # intercept empty font
                    new_buffer = None
            except Exception:
                pymupdf.exception_info()
                new_buffer = None
        return new_buffer

    def repl_fontnames(doc):
        """Populate 'font_buffers'.

        For each font candidate, store its xref and the list of names
        by which PDF text may refer to it (there may be multiple).
        """

        def norm_name(name):
            """Recreate font name that contains PDF hex codes.

            E.g. #20 -> space, chr(32)
            """
            while "#" in name:
                p = name.find("#")
                c = int(name[p + 1 : p + 3], 16)
                name = name.replace(name[p : p + 3], chr(c))
            return name

        def get_fontnames(doc, item):
            """Return a list of fontnames for an item of page.get_fonts().

            There may be multiple names e.g. for Type0 fonts.
            """
            fontname = item[3]
            names = [fontname]
            fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
            fontname = norm_name(fontname)
            if fontname not in names:
                names.append(fontname)
            descendents = doc.xref_get_key(item[0], "DescendantFonts")
            if descendents[0] != "array":
                return names
            descendents = descendents[1][1:-1]
            if descendents.endswith(" 0 R"):
                xref = int(descendents[:-4])
                descendents = doc.xref_object(xref, compressed=True)
            p1 = descendents.find("/BaseFont")
            if p1 >= 0:
                p2 = descendents.find("/", p1 + 1)
                p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
                fontname = descendents[p2 + 1 : p1]
                fontname = norm_name(fontname)
                if fontname not in names:
                    names.append(fontname)
            return names

        for i in range(doc.page_count):
            for f in doc.get_page_fonts(i, full=True):
                font_xref = f[0]  # font xref
                font_ext = f[1]  # font file extension
                basename = f[3]  # font basename

                if font_ext not in (  # skip if not supported by fontTools
                    "otf",
                    "ttf",
                    "woff",
                    "woff2",
                ):
                    continue
                # skip fonts which already are subsets
                if len(basename) > 6 and basename[6] == "+":
                    continue

                extr = doc.extract_font(font_xref)
                fontbuffer = extr[-1]
                names = get_fontnames(doc, f)
                name_set, xref_set, subsets = font_buffers.get(
                    fontbuffer, (set(), set(), (set(), set()))
                )
                xref_set.add(font_xref)
                for name in names:
                    name_set.add(name)
                font = pymupdf.Font(fontbuffer=fontbuffer)
                name_set.add(font.name)
                del font
                font_buffers[fontbuffer] = (name_set, xref_set, subsets)

    def find_buffer_by_name(name):
        for buffer, (name_set, _, _) in font_buffers.items():
            if name in name_set:
                return buffer
        return None

    # -----------------
    # main function
    # -----------------
    repl_fontnames(doc)  # populate font information
    if not font_buffers:  # nothing found to do
        if verbose:
            pymupdf.message(f'No fonts to subset.')
        return 0

    old_fontsize = 0
    new_fontsize = 0
    for fontbuffer in font_buffers.keys():
        old_fontsize += len(fontbuffer)

    # Scan page text for usage of subsettable fonts
    for page in doc:
        # go through the text and extend set of used glyphs by font
        # we use a modified MuPDF trace device, which delivers us glyph ids.
        for span in page.get_texttrace():
            if type(span) is not dict:  # skip useless information
                continue
            fontname = span["font"][:33]  # fontname for the span
            buffer = find_buffer_by_name(fontname)
            if buffer is None:
                continue
            name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
            for c in span["chars"]:
                set_ucs.add(c[0])  # unicode
                set_gid.add(c[1])  # glyph id
            font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))

    # build the font subsets
    for old_buffer, (name_set, xref_set, subsets) in font_buffers.items():
        new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
        fontname = list(name_set)[0]
        if new_buffer is None or len(new_buffer) >= len(old_buffer):
            # subset was not created or did not get smaller
            if verbose:
                pymupdf.message(f'Cannot subset {fontname!r}.')
            continue
        if verbose:
            pymupdf.message(f"Built subset of font {fontname!r}.")
        val = doc._insert_font(fontbuffer=new_buffer)  # store subset font in PDF
        new_xref = val[0]  # get its xref
        set_subset_fontname(new_xref)  # tag fontname as subset font
        font_str = doc.xref_object(  # get its object definition
            new_xref,
            compressed=True,
        )
        # walk through the original font xrefs and replace each by the subset def
        for font_xref in xref_set:
            # we need the original '/W' and '/DW' width values
            width_table, def_width = get_old_widths(font_xref)
            # ... and replace original font definition at xref with it
            doc.update_object(font_xref, font_str)
            # now copy over old '/W' and '/DW' values
            if width_table or def_width:
                set_old_widths(font_xref, width_table, def_width)
        # 'new_xref' remains unused in the PDF and must be removed
        # by garbage collection.
        new_fontsize += len(new_buffer)

    return old_fontsize - new_fontsize


# -------------------------------------------------------------------
# Copy XREF object to another XREF
# -------------------------------------------------------------------
def xref_copy(doc: pymupdf.Document, source: int, target: int, *, keep: list = None) -> None:
    """Copy a PDF dictionary object to another one given their xref numbers.

    Args:
        doc: PDF document object
        source: source xref number
        target: target xref number, the xref must already exist
        keep: an optional list of 1st level keys in target that should not be
              removed before copying.
    Notes:
        This works similar to the copy() method of dictionaries in Python. The
        source may be a stream object.
    """
    if doc.xref_is_stream(source):
        # read new xref stream, maintaining compression
        stream = doc.xref_stream_raw(source)
        doc.update_stream(
            target,
            stream,
            compress=False,  # keeps source compression
            new=True,  # in case target is no stream
        )

    # empty the target completely, observe exceptions
    if keep is None:
        keep = []
    for key in doc.xref_get_keys(target):
        if key in keep:
            continue
        doc.xref_set_key(target, key, "null")
    # copy over all source dict items
    for key in doc.xref_get_keys(source):
        item = doc.xref_get_key(source, key)
        doc.xref_set_key(target, key, item[1])