PhD

The LaTeX sources of my Ph.D. thesis
git clone https://esimon.eu/repos/PhD.git
Log | Files | Refs | README | LICENSE

commit fe467663d24b839b04b54668c7a27de70f94be95
parent 94bc74b6ce08b0e8b2028f81a5bd9010d9676d03
Author: Étienne Simon <esimon@esimon.eu>
Date:   Sun, 10 Jul 2022 12:00:00 +0200

Version archives

Diffstat:
MREADME | 13++++++++++++-
Mbackmatter/colophon.tex | 2+-
Mfrontmatter/acknowledgements.tex | 201+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Mfrontmatter/title.tex | 6+++---
Mlatexmkrc | 12+++++++++++-
Mlib/render.lua | 37+++++++++++++++++++++----------------
Mmainmatter/fitb/problem 1.tex | 22++++++++++++++++------
Mmainmatter/fitb/problem 2.tex | 28++++++++++++++++++++--------
Amainmatter/graph/Wasserstein.tex | 45+++++++++++++++++++++++++++++++++++++++++++++
Mmainmatter/graph/approach.tex | 15++++++++++-----
Mmainmatter/graph/encoding.tex | 9+--------
Amainmatter/graph/mtb graph.tex | 8++++++++
Mmainmatter/relation extraction/emes.tex | 6++++++
Mmainmatter/relation extraction/marcheggiani plate.tex | 15++++++++++++---
Mmainmatter/relation extraction/pcnn.tex | 12+++++++++---
Mmainmatter/relation extraction/rellda plate.tex | 19+++++++++++++------
Mmainmatter/relation extraction/unsupervised.tex | 1+
Apresentation.tex | 23+++++++++++++++++++++++
Apresentation/beamercolorthemethesis.sty | 32++++++++++++++++++++++++++++++++
Apresentation/beamerfontthemethesis.sty | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/beamerinnerthemethesis.sty | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/beamerouterthemethesis.sty | 23+++++++++++++++++++++++
Apresentation/beamerthemethesis.sty | 35+++++++++++++++++++++++++++++++++++
Apresentation/conclusion/contributions.tex | 24++++++++++++++++++++++++
Apresentation/conclusion/perspectives.tex | 21+++++++++++++++++++++
Apresentation/conclusion/questions.tex | 10++++++++++
Apresentation/conclusion/section.tex | 6++++++
Apresentation/context/clustering.tex | 47+++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/context/contributions.tex | 41+++++++++++++++++++++++++++++++++++++++++
Apresentation/context/history.tex | 28++++++++++++++++++++++++++++
Apresentation/context/otter inside box.jpg | 0
Apresentation/context/otter1.jpg | 0
Apresentation/context/otter2.jpg | 0
Apresentation/context/person inside room.jpg | 0
Apresentation/context/section.tex | 6++++++
Apresentation/context/similarity function.tex | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/context/symbolic.tex | 27+++++++++++++++++++++++++++
Apresentation/context/task.tex | 24++++++++++++++++++++++++
Apresentation/fitb/classifier.tex | 11+++++++++++
Apresentation/fitb/clustering.tex | 30++++++++++++++++++++++++++++++
Apresentation/fitb/conclusion.tex | 17+++++++++++++++++
Apresentation/fitb/deep fail.tex | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/fitb/distribution distance.tex | 19+++++++++++++++++++
Apresentation/fitb/entity predictor.tex | 39+++++++++++++++++++++++++++++++++++++++
Apresentation/fitb/marcheggiani.tex | 36++++++++++++++++++++++++++++++++++++
Apresentation/fitb/negative sampling.tex | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/fitb/pcnn.tex | 8++++++++
Apresentation/fitb/plan.tex | 10++++++++++
Apresentation/fitb/problems.tex | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/fitb/qualitative.tex | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/fitb/quantitative.tex | 39+++++++++++++++++++++++++++++++++++++++
Apresentation/fitb/rellda.tex | 40++++++++++++++++++++++++++++++++++++++++
Apresentation/fitb/section.tex | 19+++++++++++++++++++
Apresentation/fitb/skewness.tex | 19+++++++++++++++++++
Apresentation/fitb/surrogate.tex | 32++++++++++++++++++++++++++++++++
Apresentation/fitb/teaser.tex | 7+++++++
Apresentation/graph/basic approaches.tex | 20++++++++++++++++++++
Apresentation/graph/conclusion.tex | 16++++++++++++++++
Apresentation/graph/counting.tex | 18++++++++++++++++++
Apresentation/graph/encoding.tex | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/graph/gcn.tex | 31+++++++++++++++++++++++++++++++
Apresentation/graph/introduction.tex | 22++++++++++++++++++++++
Apresentation/graph/mtb.tex | 32++++++++++++++++++++++++++++++++
Apresentation/graph/quantitative.tex | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/graph/section.tex | 15+++++++++++++++
Apresentation/graph/similarity function.tex | 45+++++++++++++++++++++++++++++++++++++++++++++
Apresentation/graph/statistics.tex | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/graph/topological similarity.tex | 21+++++++++++++++++++++
Apresentation/graph/triplet loss.tex | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/graph/wasserstein.tex | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Apresentation/graph/wl.tex | 28++++++++++++++++++++++++++++
Apresentation/supplementary/alignsep.tex | 16++++++++++++++++
Apresentation/supplementary/distant.tex | 23+++++++++++++++++++++++
Apresentation/supplementary/features.tex | 12++++++++++++
Apresentation/supplementary/gumbel.tex | 19+++++++++++++++++++
Apresentation/supplementary/line graph.tex | 4++++
Apresentation/supplementary/metrics.tex | 31+++++++++++++++++++++++++++++++
Apresentation/supplementary/otter.jpg | 0
Apresentation/supplementary/otter.tex | 4++++
Apresentation/supplementary/section.tex | 10++++++++++
Apresentation/supplementary/spectral.tex | 17+++++++++++++++++
Mthesis.bib | 10+++++++++-
Mthesis.cls | 170+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mthesis.sty | 68++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Mthesis.tex | 2+-
85 files changed, 2320 insertions(+), 140 deletions(-)

diff --git a/README b/README @@ -1,8 +1,19 @@ To compile this thesis run: $ latexmk thesis.tex +You can also compile french\ summary.tex to get the state-mandated summary in French as a standalone document and presentation.tex to generate my defense's slides. + This was only tested using TeX Live 2021 and might need some work to compile with future versions. +The fonts I'm using are: + - Latin Modern (part of TeX Live) + - XITS Math (part of TeX Live) + - TeX Gyre Bonum (part of TeX Live) + - GFS Didot Classic (part of TeX Live) + - EB Garamond + - I.Ming + - IPA Mincho + - Futura Now (defense slides only) Compiled versions are available at https://esimon.eu/PhD -The sources are not perfect but I had to graduate at some point. :) +The sources are not perfect but I had to defend at some point. :) diff --git a/backmatter/colophon.tex b/backmatter/colophon.tex @@ -10,7 +10,7 @@ This document is written in Lua\LaTeX{} using \textsc{pgf}/Ti\emph{k}Z and \textsc{pgfplots} for figures. Most of the text and math are typeset in Latin Modern, while \textsc{eb} Garamond is used for titles. A small amount of characters are from the \TeX{} Gyre Bonum and \textsc{xits} fonts. -Greek words are typeset in the Greek Font Society's Didot Classic, while Chinese excerpts are in the \textsc{i}.Ming font. +Greek words are typeset in the Greek Font Society's Didot Classic, Chinese excerpts are in the \textsc{i}.Ming font and Japanese text is in \textsc{ipa} Mincho. Finally, the word ``\textsc{thèse}'' on the title page comes from a vectorization of Auguste Boulanger's Ph.D.\ theses (\cite*{these_boulanger}). \bigskip diff --git a/frontmatter/acknowledgements.tex b/frontmatter/acknowledgements.tex @@ -1,11 +1,192 @@ \chapter{Acknowledgements} -\e{Code couleur des commentaires:} -\begin{itemize} - \item \e{Moi} - \item \benj{Benjamin} - \item \vinc{Vincent} - \item \reu{Réunion} - \item \syr{Syrielle} -\end{itemize} - -\e{Trois autres couleurs sont régulièrement utilisées dans des figures: \tikz{\path[fill=Dark2-A] (0,0) rectangle (1.5mm,2mm);\path[fill=Dark2-B] (1.5mm,0) rectangle (3mm,2mm);\path[fill=Dark2-C] (3mm,0) rectangle (4.5mm,2mm);}, ces éléments ont vocation à être permanents.} +%%%%%%%%%%%%%%%%% +%%% The world %%% +%%%%%%%%%%%%%%%%% +I'm not sure how to write these acknowledgements who to thank how to thank them there are so many people who contributed to the completion of this thesis +maybe I should thank my optician who enabled me to read so many papers +but why stop there I'm sure my glasses were made using some kind of polishing machine with bolts made by a worker supported through their childhood by a sweetheart now long forgotten +I'm however deeply thankful to the boltmaker's childhood sweetheart for bringing them a bit of warmth that might not have been strictly necessary to the conception of the metal fasteners indirectly ensuring my optical prowesses but that I want to celebrate anyway +it might be easier to list people I don't want to thank +no that would be ill-disposed +I won't go into woollen coats +%%%%%%%%%%%%%%%% +%%% The jury %%% +%%%%%%%%%%%%%%%% +I'll focus on people more closely related to my doctoral endeavors +first of which are my supervisors +thank you Benjamin Piwowarski for providing key insight on the information extraction field the gentle encouragements and for the guidance through the years +thank you Vincent Guigue for your vast knowledge of how things are done and how they are not I hope you do not disapprove too much of the otters in my defense's slides +I'm also grateful to other members of the jury for examining my thesis in particular I would like to thank the reviewers for thoroughly reading my dissertation I hope it was as pleasant to read as it was to write I should elucidate that this is not a curse I quite enjoyed taking the time to put in writing what I learned over the last few years +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Carl Marletti disgression %%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +I shall also thank Carl Marletti creator of the Lily Valley with its indecently delicious or maybe deliciously indecent association of sugar fat and violets into a mean of reaching my goal of contracting diabetes on my deathbed +note however that since it was contracted through the ingestion of violets it can surely be analyzed as poetical diabetes although still classified as type~2 by medical professionals +Carl Your glory in what was an undeniable and mouthwatering emotional support shall not be forgotten +I was furthermore corrupted into the sin of gluttony by Sadaharu Aoki Toque Cuivrée and Merveilleux de chez Fred +I also enjoy Baillardran despite its overpricing and overtly bourgeois styling I should therefore not be too vocal about it in case liberation precedes alopecia +I feel like I'm digressing about boltmakers again +%%%%%%%%%%%%%%%%%%%%%% +%%% Angelic beings %%% +%%%%%%%%%%%%%%%%%%%%%% +no if I have to thank someone for emotional support first and foremost that would be Raphaëlle Labarrière Syrielle Montariol and Billur Sezgin +thank you for the numerous evenings and nights spent rebuilding the world and ourselves with an excessive amount of wine +thank you for making me discover things that I enjoy about life +%%%%%%%%%%%%%%%%%%% +%%% Litterature %%% +%%%%%%%%%%%%%%%%%%% +in particular thank you for sharing a copious amount of literary works +I particularly enjoy reading classical authors whose remains are in various states of decomposition between the retirement home and the Panthéon I had the pleasure of snatching several volumes from their skeletal hands thank you Raphaëlle for providing flawless directions in grave robbery +%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Informal Reviewers %%% +%%%%%%%%%%%%%%%%%%%%%%%%%% +I further thank Jill-Jênn Vie and once again Syrielle Montariol and Arij Riabi for proofreading my dissertation +you did a great job correcting me and oh boy did I need it thankfully the first version was not witnessed by the official reviewers +that's what's called taking one for the team +%%%%%%%%%%%%%%%%%%% +%%% Digressions %%% +%%%%%%%%%%%%%%%%%%% +% Tomatoes +next in order I would like to thank tomatoes in all their shapes and forms +thank you for helping me keep a semblance of health +for giving a bit more of a culinary personality beyond that of a sugar-addict even though I still have a kind of obsessive relationship with you +I'm not ready to deconstruct it yet +I wrote you a poem it goes like this +Oh tomato Ô tomato {\japaneseFont 御}tomato +% Dr Bouteille +speaking of health thank you Dr Bouteille for taking care of me and having a funny name +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Too nice for this world %%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +and now speaking of bottle I would like to thank sweet-tooth Syrielle Montariol and mixologist Kenza Jernite for being among the most admirable people I know +the world is kinder with you in it +I enjoyed sharing liquorous wine and cocktails with you +and I have faith we will be getting diabetes together +with moderation +% Just bitchy enough for this world +because on the other side Billur is accompanying me beyond moderation on cathartic disdain +I can only hope we'll be getting to hell together +%%%%%%%%%%%%%%%%%%% +%%% Clinophilia %%% +%%%%%%%%%%%%%%%%%%% +but for now waiting for this unavoidable and deserved end I would like to thank my bed for being the nicest place on earth and not only my bed all the beds that sheltered me in the past and all those that will accommodate me in the future +I shall even include couches or any surfaces appropriate for napping +I yearn the days we were forced to sleep in kindergarten +and I can only hope this will be generalized to latter stages of life +as I'm sleep-deprivedly writing these acknowledgements the night before my defense +%%%%%%%%%%%%%%%% +%%% Plushies %%% +%%%%%%%%%%%%%%%% +you might think this clinophilia is revealing of an underlying pathology but fear not as I enjoy all soft things made from fabric +I take the opportunity to thank the kind overseeing of my work by communist \textsc{bert} and the transitional support of Waza Brick Oishii et al +despite your stoic airs I know how you feel deep inside +%%%%%%%%%%%% +%%% Cats %%% +%%%%%%%%%%%% +I should also thank several members of the Felidae family for providing purring +especially Ozwin you're a coward but you're cute you're a cute coward +you're worth your weight of catnip +when you get the chance of meeting her in a courageous disposition you can hold her little furry paws in your hand feel the softness of this unique specimen +how nice yes +at times though her dark side overwhelms her and wounds you while you're in the most playful mood she nonchalantly write a Greek tragedy in the thunderous strength of her claws yes yes but cutely though +%%%%%%%%%%%%%% +%%% Family %%% +%%%%%%%%%%%%%% +speaking of family I should thank mine which is boringly Hominidae the end of the genealogy tree looks like +\begin{tikzpicture}[person/.style={circle, fill=black, minimum width=0.5mm, inner sep=0}] + \node[person] (root) at ( 0mm, 0mm) {}; + \node[person] (l) at (-2mm, 1mm) {}; + \node[person] (r) at ( 2mm, 1mm) {}; + \node[person] (ll) at (-3mm, 2mm) {}; + \node[person] (lr) at (-1mm, 2mm) {}; + \node[person] (rl) at ( 1mm, 2mm) {}; + \node[person] (rr) at ( 3mm, 2mm) {}; + \draw (root) -- (l); + \draw (root) -- (r); + \draw (l) -- (ll); + \draw (l) -- (lr); + \draw (r) -- (rl); + \draw (r) -- (rr); +\end{tikzpicture} +yes at depth 2 it's still a tree that was an intelligent decision by my ancestors +and though I don't remember everything I am thankful for your support early in life +%%%%%%%%%%%%%%%%%%%%%%%% +%%% Action de groupe %%% +%%%%%%%%%%%%%%%%%%%%%%%% +I would also like to thank the action de groupe and its {\japaneseFont 群作用} extension +in particular thank you Jill-Jênn for jill-jênning all along +thank you Shloub for laughing at my poorly crafted puns or at anything really you're too good of a public +thank you Tito for indirectly teaching me more about machine learning than what you yourself know +thank you Alex for teaching me more about machine learning than what you wish you knew +thank you Link Mauve for showing me you can sluggishly not care about unimportant things +thank you Ryan for the gentle squabble +and thank you again Tito Alex and Link Mauve for your substantiated subversion +%%%%%%%%%%%%%%%%%%% +%%% Open source %%% +%%%%%%%%%%%%%%%%%%% +speaking of subversion I would like to thank the whole open source community +and more broadly organizations encouraging the sharing of information with as many people as possible such as Wikipedia and Sci-Hub +%%%%%%%%%%%%%%%%%% +%%% Laboratory %%% +%%%%%%%%%%%%%%%%%% +but I don't need to go this far for finding people sharing ideas +I would like to thank all members of the \textsc{mlia} team +in particular a huge thank to the people who participated in our reading groups that was legitimately the best work-related moments during my time in the lab +if we go into non-work-related we might end up in some incongruous raclette--karaoke night +I would also like to thank the people in the bestest office ever 26--00/534 thank you Marie for your strong laughter Agnès for your strong chill Tristan for your strong flow and Jean-Yves for your strong temporal consistency I'm sure the time at which you leave for your afternoon collation could have been used to calibrate atomic clocks thus providing an unyielding beacon of stability in research's messy life +finally thank you Christophe Bouder for dealing with deep learning ludicrous computational requirements and for providing a serious challenge in foosball +%%%%%%%%%%%%%%%%% +%%% Hot water %%% +%%%%%%%%%%%%%%%%% +this reminds me that I should also thank my sport mates +Syrielle and 26--00/534 for bouldering and Arij for swimming +a deep thanks to the municipal employees who decided on the nearly 30 degrees Celsius temperature for the swimming pool +I wonder whether you might have been doing more for my physical health than a bottle lost in a municipal health center +I take the opportunity to thank the love of my life with whom I share every day +hot water I sing your praise daily +a big thank you to Billur Sezgin for lending me her bathtub and thank you to Lush for achieving the feat of making it more enjoyable +thank you also to Manon Dumas Morès for accepting the same bathtub-lending deal +I might become a bathtub tycoon in the future +I partly grew up in a thermal city maybe that explains why I like hot water so much I'm not sure but just in case thank you to my thermal city Bagnères-de-Luchon +to hell with it thank you to all thermal cities they deserve it well +maybe one instance of enjoyable cold water was a night bath in Kyoto for Gozan no Okuribi thank you to the protagonist of this otherworldly night for making it so memorable +the cold water appeared first in the Takasegawa channel near Pontocho then in the duck's river Kamogawa +which despite its name was void of ducks which might have hidden following their awareness of a cooking intent originating in a native of southwestern France +%%%%%%%%%%%%%%% +%%% Cooking %%% +%%%%%%%%%%%%%%% +thank you to Syrielle Kenza and Manon for participating in singular culinary experiments +this is what inspired the OuCuiPo illustration in my introduction +sorry Manon though for getting you sick with some weird black pepper +%%%%%%%%%%%%%%%%%% +%%% Rapid Fire %%% +%%%%%%%%%%%%%%%%%% +ok it's getting late +thank you Arthur Suspene for getting old as fast as I do but slightly earlier +thank you to Sappho's friend who shall sadly remain unnamed +welcome to Jill-Jênn's firework I'm sure it's going to turn out better than the movie +and long life to Anne Émone and all her children I want to leave her offspring in 26--00/534 so that we can share something beyond our PhD +but I fear most of them will starve to death by the end of the month +finally thank you to all the excellent teachers who accompanied me until the end of my formal studies I aspire to be half as good as you were. + +\newpage +\null +\vfill +\textcolor{black!30}{\bonum\fontsize{36pt}{36pt}\selectfont\raisebox{-16pt}[8pt][0pt]{``}}% +{\itshape\,\begin{tabular}[t]{@{}l@{: }l} + Michael & Yes---it wasn't logical. \\ + George & You were a tomato! A tomato doesn't have logic. A tomato can't move. +\end{tabular}}\\ +\null\hfill---``Tootsie'' (1982) + +\bigskip + +\textcolor{black!30}{\bonum\fontsize{36pt}{36pt}\selectfont\raisebox{-16pt}[8pt][0pt]{``}}% +{\itshape\,This disaster of the Cherokees, brought to me by a sad friend to blacken my days and nights! I can do nothing; why shriek? why strike ineffectual blows? I stir in it for the sad reason that no other mortal will move, and if I do not, why, it is left undone. The amount of it, to be sure, is merely a scream; but sometimes a scream is better than a thesis.}\\ +\null\hfill---Ralph Waldo Emerson ``Letter to President van Buren'' (1838) + +\bigskip + +\textcolor{black!30}{\bonum\fontsize{36pt}{36pt}\selectfont\raisebox{-16pt}[8pt][0pt]{``}}% +{\itshape\,Aaaaaaaaaaaah}\\ +\null\hfill---Alain Chabat in ``Reality'' by Quentin Dupieux (2014) diff --git a/frontmatter/title.tex b/frontmatter/title.tex @@ -23,7 +23,7 @@ en vue de l'obtention du grade de Docteur\par \vspace{1cm} \hrule \vfill -soutenue publiquement le 30 juin 2022\par +soutenue publiquement le 5 juillet 2022\par \vfill Devant le jury composé de\par \bigskip @@ -42,11 +42,11 @@ Devant le jury composé de\par \multicolumn{3}{@{}l}{Professeure des universités, \textsc{irisa}, \textsc{insa} Rennes} \\ \textbf{Pr} & \textbf{Xavier Tannier} & - Examinateur \\ + Président \\ \multicolumn{3}{@{}l}{Professeur des universités, Sorbonne Université} \\ \textbf{Dr} & \textbf{Benjamin Piwowarski} & - Co-encadrant \\ + Directeur \\ \multicolumn{3}{@{}l}{Chargé de recherche, \textsc{cnrs}, Sorbonne Université} \\ \textbf{Dr} & \textbf{Vincent Guigue} & diff --git a/latexmkrc b/latexmkrc @@ -1,5 +1,5 @@ # Main file -@default_files = ("thesis.tex", "french summary.tex"); +@default_files = ("thesis.tex", "french summary.tex", "presentation.tex"); # Use lualatex $pdf_mode = 4; @@ -10,6 +10,9 @@ $out_dir = "build"; # Allow the execution of arbitrary shell command set_tex_cmds("--shell-escape %O %S"); +# The beamer themes are in the presentation folder +ensure_path('TEXINPUTS', 'presentation'); + # Create build directories if needed unless(-d "$out_dir"){ mkdir("$out_dir"); @@ -23,4 +26,11 @@ unless(-d "$out_dir"){ mkdir("$out_dir/backmatter/french"); mkdir("$out_dir/backmatter/assumptions"); mkdir("$out_dir/backmatter/datasets"); + mkdir("$out_dir/presentation"); + mkdir("$out_dir/presentation/front"); + mkdir("$out_dir/presentation/context"); + mkdir("$out_dir/presentation/fitb"); + mkdir("$out_dir/presentation/graph"); + mkdir("$out_dir/presentation/conclusion"); + mkdir("$out_dir/presentation/supplementary"); } diff --git a/lib/render.lua b/lib/render.lua @@ -70,25 +70,26 @@ function render.embeddings(path) tex.print([[\def\explainedvary{]] .. ([[%2.1f\%%]]):format(100*xml.embeddings.explained.y) .. [[}%]]) end -function render_confusion(path, xorig, label) +function render_confusion(path, xorig, xdelta, radius, label, id) local xml = load_xml(path) for j=1,10 do - local xpos = xorig+j*0.27 - tex.print([[\node at (]]..xpos..[[, 0) {\scriptsize ]]..(j-1)..[[};]]) + local xpos = xorig+j*xdelta + tex.print([[\node (confusion-column-]]..id..[[-]]..j..[[) at (]]..xpos..[[, 0) {\scriptsize ]]..(j-1)..[[};]]) end for i, gold in ipairs(xml.confusion.gold) do local ypos = i*-CONFUSION_Y_SPREAD for j, cell in ipairs(gold.clusters.recall) do - local xpos = xorig+j*0.27 - local radius = math.sqrt(cell) * 0.15 + local xpos = xorig+j*xdelta + local radius = math.sqrt(cell) * radius local content = string.format("%.0f", 100*cell) tex.print([[\fill (]]..xpos..[[, ]]..ypos..[[) circle (]]..radius..[[);]]) + tex.print([[\coordinate (confusion-cell-]]..id..[[-]]..i..[[-]]..j..[[) at (]]..xpos..[[, ]]..ypos..[[);]]) end end local ypos = -CONFUSION_Y_SPREAD*#xml.confusion.gold - 0.2 - local bwest = xorig + 0.5*0.27 - local beast = xorig + 10.5*0.27 - tex.print([[\draw[decorate, decoration={brace, amplitude=5}] (]]..beast..[[, ]]..ypos..[[) -- (]]..bwest..[[, ]]..ypos..[[) node[below, midway, yshift=-1mm] {]]..label..[[};]]) + local bwest = xorig + 0.5*xdelta + local beast = xorig + 10.5*xdelta + tex.print([[\draw[decorate, decoration={brace, amplitude=5}] (]]..beast..[[, ]]..ypos..[[) -- (]]..bwest..[[, ]]..ypos..[[) node[below, midway, yshift=-1mm] (confusion-model-]]..id..[[) {]]..label..[[};]]) end function render_confusion_legend(path) @@ -105,18 +106,22 @@ function render_confusion_legend(path) label = [[\(e_1\) ]] .. gold.relation.surfaceform .. [[ \(e_2\)]] end local ypos = i*-CONFUSION_Y_SPREAD - tex.print([[\node[anchor=west] at (0, ]]..ypos..[[) {\scriptsize{}]]..frequency..[[\% ]]..label..[[ (\wdrel{]]..gold.relation.identifier..[[})};]]) + tex.print([[\node[anchor=west] (confusion-row-]]..i..[[) at (0, ]]..ypos..[[) {\makeatletter\scriptsize{}\ifthesis@presentation\else]]..frequency..[[\% \fi]]..label..[[\ifthesis@presentation\else (\wdrel{]]..gold.relation.identifier..[[})\fi\makeatother};]]) end end -function render.confusions(path1, label1, path2, label2, path3, label3, path4, label4) - tex.print([[\begin{tikzpicture}]]) - render_confusion(path1, -12, label1) - render_confusion(path2, -9, label2) - render_confusion(path3, -6, label3) - render_confusion(path4, -3, label4) +function render.confusions(mdelta, cdelta, radius, path1, label1, path2, label2, path3, label3, path4, label4, intikzpicture) + if not intikzpicture then + tex.print([[\begin{tikzpicture}]]) + end + render_confusion(path1, -4*mdelta, cdelta, radius, label1, 1) + render_confusion(path2, -3*mdelta, cdelta, radius, label2, 2) + render_confusion(path3, -2*mdelta, cdelta, radius, label3, 3) + render_confusion(path4, -1*mdelta, cdelta, radius, label4, 4) render_confusion_legend(path1) - tex.print([[\end{tikzpicture}]]) + if not intikzpicture then + tex.print([[\end{tikzpicture}]]) + end end local function degrees_table(dict) diff --git a/mainmatter/fitb/problem 1.tex b/mainmatter/fitb/problem 1.tex @@ -6,10 +6,20 @@ \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{3}{0/0.33, 1/0.31, 2/0.34, 3/0.36, 4/0.34, 5/0.33, 6/0.37, 7/0.35, 8/0.36, 9/0.32} \node at (0.75, 2.665) {\(\vdots\)}; - \node[anchor=south west] at (0, 2) {\IfLanguageName{french}{Distribution désirée}{Desired distributions}:}; - \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{1.5}{0/0.02, 1/0.05, 2/0.01, 3/0.07, 4/0.88, 5/0.06, 6/0.04, 7/0.06, 8/0.06, 9/0.01} - \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{1}{0/0.01, 1/0.07, 2/0.88, 3/0.05, 4/0.02, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} - \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{0.5}{0/0.02, 1/0.05, 2/0.01, 3/0.88, 4/0.02, 5/0.03, 6/0.07, 7/0.03, 8/0.02, 9/0.03} - \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{0}{0/0.03, 1/0.01, 2/0.04, 3/0.06, 4/0.04, 5/0.03, 6/0.87, 7/0.05, 8/0.06, 9/0.02} - \node at (0.75, -0.335) {\(\vdots\)}; + \makeatletter + \ifthesis@presentation + \def\desiredShift{-5mm} + \else + \def\desiredShift{0mm} + \fi + \makeatother + + \begin{scope}[yshift=\desiredShift] + \node[anchor=south west] at (0, 2) {\IfLanguageName{french}{Distribution désirée}{Desired distributions}:}; + \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{1.5}{0/0.02, 1/0.05, 2/0.01, 3/0.07, 4/0.88, 5/0.06, 6/0.04, 7/0.06, 8/0.06, 9/0.01} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{1}{0/0.01, 1/0.07, 2/0.88, 3/0.05, 4/0.02, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{0.5}{0/0.02, 1/0.05, 2/0.01, 3/0.88, 4/0.02, 5/0.03, 6/0.07, 7/0.03, 8/0.02, 9/0.03} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{0}{0/0.03, 1/0.01, 2/0.04, 3/0.06, 4/0.04, 5/0.03, 6/0.87, 7/0.05, 8/0.06, 9/0.02} + \node at (0.75, -0.335) {\(\vdots\)}; + \end{scope} \end{tikzpicture}% diff --git a/mainmatter/fitb/problem 2.tex b/mainmatter/fitb/problem 2.tex @@ -1,17 +1,29 @@ \begin{tikzpicture} + \makeatletter + \ifthesis@presentation + \def\desiredShift{-5mm} + \def\averageNoun{expectation} + \else + \def\desiredShift{0mm} + \def\averageNoun{average} + \fi + \makeatother + \node[anchor=south west] at (0, 6) {Degenerate distributions:}; \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{5.5}{0/0.02, 1/0.05, 2/0.01, 3/0.87, 4/0.08, 5/0.06, 6/0.04, 7/0.06, 8/0.06, 9/0.01} \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{5}{0/0.01, 1/0.07, 2/0.08, 3/0.85, 4/0.02, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{4.5}{0/0.02, 1/0.05, 2/0.01, 3/0.88, 4/0.02, 5/0.03, 6/0.07, 7/0.03, 8/0.02, 9/0.03} \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{4}{0/0.03, 1/0.01, 2/0.04, 3/0.86, 4/0.04, 5/0.03, 6/0.07, 7/0.05, 8/0.06, 9/0.02} \node at (0.75, 3.665) {\(\vdots\)}; - \drawDistribution{mean activation}{\(\text{average} = \)}{3}{0/0.02, 1/0.045, 2/0.035, 3/0.865, 4/0.04, 5/0.0375, 6/0.0625, 7/0.05, 8/0.04, 9/0.0275} + \drawDistribution{mean activation}{\(\text{\averageNoun} = \)}{3}{0/0.02, 1/0.045, 2/0.035, 3/0.865, 4/0.04, 5/0.0375, 6/0.0625, 7/0.05, 8/0.04, 9/0.0275} - \node[anchor=south west] at (0, 2.5) {Desired distributions:}; - \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{2}{0/0.02, 1/0.05, 2/0.01, 3/0.07, 4/0.08, 5/0.06, 6/0.04, 7/0.86, 8/0.06, 9/0.01} - \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{1.5}{0/0.01, 1/0.07, 2/0.08, 3/0.05, 4/0.82, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} - \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{1}{0/0.02, 1/0.05, 2/0.01, 3/0.08, 4/0.02, 5/0.83, 6/0.07, 7/0.03, 8/0.02, 9/0.03} - \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{0.5}{0/0.83, 1/0.01, 2/0.04, 3/0.06, 4/0.04, 5/0.03, 6/0.07, 7/0.05, 8/0.06, 9/0.02} - \node at (0.75, 0.165) {\(\vdots\)}; - \drawDistribution{mean activation}{\(\text{average} = \)}{-0.5}{0/0.35, 1/0.32, 2/0.31, 3/0.37, 4/0.38, 5/0.36, 6/0.34, 7/0.36, 8/0.36, 9/0.31} + \begin{scope}[yshift=\desiredShift] + \node[anchor=south west] at (0, 2.5) {Desired distributions:}; + \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{2}{0/0.02, 1/0.05, 2/0.01, 3/0.07, 4/0.08, 5/0.06, 6/0.04, 7/0.86, 8/0.06, 9/0.01} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{1.5}{0/0.01, 1/0.07, 2/0.08, 3/0.05, 4/0.82, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{1}{0/0.02, 1/0.05, 2/0.01, 3/0.08, 4/0.02, 5/0.83, 6/0.07, 7/0.03, 8/0.02, 9/0.03} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{0.5}{0/0.83, 1/0.01, 2/0.04, 3/0.06, 4/0.04, 5/0.03, 6/0.07, 7/0.05, 8/0.06, 9/0.02} + \node at (0.75, 0.165) {\(\vdots\)}; + \drawDistribution{mean activation}{\(\text{\averageNoun} = \)}{-0.5}{0/0.35, 1/0.32, 2/0.31, 3/0.37, 4/0.38, 5/0.36, 6/0.34, 7/0.36, 8/0.36, 9/0.31} + \end{scope} \end{tikzpicture}% diff --git a/mainmatter/graph/Wasserstein.tex b/mainmatter/graph/Wasserstein.tex @@ -0,0 +1,45 @@ +\begingroup% +\makeatletter% +\ifthesis@presentation% + \def\leftLabel{\(\gfeneighbors(x)\)}% + \def\rightLabel{\(\gfeneighbors(x')\)}% +\else% + \def\leftLabel{\(S_\gfnright(a, k)\)}% + \def\rightLabel{\(S_\gfnright(a', k)\)}% +\fi% +\makeatother% +\begin{tikzpicture} + \node at (-1.5, 1.75) {\textcolor{Dark2-A}{\leftLabel}}; + \node at (1, 0.25) {\textcolor{Dark2-B}{\rightLabel}}; + + \coordinate (xa1) at (-1, 1.3); + \coordinate (xa2) at (-1.5, 0.95); + \coordinate (xa3) at (-0.75, 0.13); + \coordinate (xa4) at (-0.5, 1.16); + \coordinate (xa5) at (-0.25, 1.9); + + \coordinate (xb1) at (1, 1.9); + \coordinate (xb2) at (1.25, 1.1); + \coordinate (xb3) at (0.5, 0.7); + \coordinate (xb4) at (0.25, 1.8); + + \draw[thick, Dark2-C, opacity=1] (xa1) -- (xb4); + \draw[thick, Dark2-C, opacity=0.5] (xa2) -- (xb1); + \draw[thick, Dark2-C, opacity=0.25] (xa2) -- (xb2); + \draw[thick, Dark2-C, opacity=0.25] (xa2) -- (xb3); + \draw[thick, Dark2-C, opacity=1] (xa3) -- (xb3); + \draw[thick, Dark2-C, opacity=1] (xa4) -- (xb2); + \draw[thick, Dark2-C, opacity=0.75] (xa5) -- (xb1); + \draw[thick, Dark2-C, opacity=0.25] (xa5) -- (xb4); + + \fill[fill=Dark2-A] (xa1) circle (1mm); + \fill[fill=Dark2-A] (xa2) circle (1mm); + \fill[fill=Dark2-A] (xa3) circle (1mm); + \fill[fill=Dark2-A] (xa4) circle (1mm); + \fill[fill=Dark2-A] (xa5) circle (1mm); + \fill[fill=Dark2-B] (xb1) circle (1mm); + \fill[fill=Dark2-B] (xb2) circle (1mm); + \fill[fill=Dark2-B] (xb3) circle (1mm); + \fill[fill=Dark2-B] (xb4) circle (1mm); +\end{tikzpicture}% +\endgroup% diff --git a/mainmatter/graph/approach.tex b/mainmatter/graph/approach.tex @@ -130,11 +130,16 @@ This corresponds to comparing two empirical distributions of sentence representa We are comparing sentence representations and not directly sentences since the initial coloring \(\chi_0\) has been defined using \bertcoder. } that have an entity in common with \(a\) and \(a'\). -This can be done using the 1-Wasserstein distance between the two neighborhoods since they can be seen as two distributions of Dirac deltas in \bertcoder{} representation space.% +This can be done using the 1-Wasserstein distance between the two neighborhoods since they can be seen as two distributions in \bertcoder{} representation space.% \sidenote{ - Wasserstein distance has the advantage of working on distributions with disjoint supports. + The 1-Wasserstein distance has the advantage of working on distributions with disjoint supports. + We use a discrete version; it can be interpreted as the cost of transforming one set of points into another, that is the length of the blue lines times their opacity in the following schema: + \vspace{-2mm}% + \begin{center} + \input{mainmatter/graph/Wasserstein.tex} + \end{center} } -This needs to be done for the two entities, which correspond to the in-arc-neighbors \(\gfeneighbors_\gfnleft\) and out-arc-neighbors \(\gfeneighbors_\gfnright\). +This needs to be done for the two entities, which corresponds to the in-arc-neighbors \(\gfeneighbors_\gfnleft\) and out-arc-neighbors \(\gfeneighbors_\gfnright\). While this is 1-localized, we can generalize this encoding to be \(K\)-localized by defining the \(k\)-sphere centered on an arc \(a\), where the 1-sphere corresponds to \(\gfeneighbors\): \begin{align*} S_\gfnright(a, 0) & = \{\,a\,\} \\ @@ -149,7 +154,7 @@ The in-neighborhood is defined similarly. Finally, the distance between two samples \(a, a'\in\arcSet\) can be defined as: \begin{marginparagraph} To be precise Equation~\ref{eq:graph:topological distance} defines a distance between samples from the Euclidean distances between neighboring samples---that is samples with an entity in common. - The distance \(W_1\) is the cost of the optimal transport plan between two sets of Dirac deltas corresponding to the neighborhoods of the samples. + The distance \(W_1\) is the cost of the optimal transport plan between two discrete sets of points corresponding to the neighborhoods of the samples \parencite{optimal_transport}. \end{marginparagraph} \begin{equation} d(a, a'; \vctr{\lambda}) = @@ -161,7 +166,7 @@ Finally, the distance between two samples \(a, a'\in\arcSet\) can be defined as: where \(W_1\) designates the 1-Wasserstein distance, and \(\vctr{\lambda}\in\symbb{R}^{K+1}\) weights the contribution of each sphere to the final distance value. In particular \(\lambda_0\) parametrizes how much the linguistic features should weight compared to topological features.% \sidenote{ - The 1-Wasserstein distance is defined on top of a metric space; therefore, the difference between two neighbors must be defined using the Euclidean distance. + The 1-Wasserstein distance is defined on top of a metric space; therefore, the difference between two neighbors must be defined using the Euclidean distance (or any other distance). We can't use dot product as usually done with \textsc{bert} representations (see for example Equation~\ref{eq:relation extraction:mtb similarity}). However, we can slightly change Equation~\ref{eq:graph:topological distance} to use the dot product for the computation of the linguistic similarity (the term \(k=0\)). In this case, however, \(d\) would no longer satisfy the properties of a metric. diff --git a/mainmatter/graph/encoding.tex b/mainmatter/graph/encoding.tex @@ -116,14 +116,7 @@ For example, the hypothesis \hypothesis{biclique} draw its name from the fact th This is especially of interest to study matching the blanks (\textsc{mtb}, Section~\ref{sec:relation extraction:mtb}). It can be analyzed using the following graph:% \begin{center} - \begin{tikzpicture} - \node (e1) {\(e_1\)}; - \node[right=of e1] (e2) {\(e_2\)}; - \node[left=of e1] (e3) {\(e_3\)}; - \draw[arrow] (e1) to node[midway,above] {\(r_3\)} (e3); - \draw[arrow] (e1) to[bend left=30] node[midway,above] {\(r_1\)} (e2); - \draw[arrow] (e1) to[bend right=30] node[midway,below] {\(r_2\)} (e2); - \end{tikzpicture} + \input{mainmatter/graph/mtb graph.tex} \end{center} \textsc{mtb} makes two main assumptions: \hypothesis{1-adjacency} and \hypothesis{\(1\to1\)}. In the above graph, \hypothesis{1-adjacency} implies that \(r_1\) and \(r_2\) should be the same, while \hypothesis{\(1\to1\)} implies that \(r_3\) should be different from \(r_1\) and \(r_2\). diff --git a/mainmatter/graph/mtb graph.tex b/mainmatter/graph/mtb graph.tex @@ -0,0 +1,8 @@ +\begin{tikzpicture} + \node (e1) {\(e_1\)}; + \node[right=of e1] (e2) {\(e_2\)}; + \node[left=of e1] (e3) {\(e_3\)}; + \draw[arrow] (e1) to node[midway,above] {\(r_3\)} (e3); + \draw[arrow] (e1) to[bend left=30] node[midway,above] {\(r_1\)} (e2); + \draw[arrow] (e1) to[bend right=30] node[midway,below] {\(r_2\)} (e2); +\end{tikzpicture} diff --git a/mainmatter/relation extraction/emes.tex b/mainmatter/relation extraction/emes.tex @@ -57,4 +57,10 @@ \draw[arrow,rounded corners=1mm] (vbe1) -- (vbe1|-fmid) -- (re1|-fmid) -- (re1); \draw[arrow,rounded corners=1mm] (vbe2) -- (vbe2|-fmid) -- (re2|-fmid) -- (re2); + + \makeatletter + \ifthesis@presentation + \node[right=1mm of re2] {\(\bertcoder(x)\)}; + \fi + \makeatother \end{tikzpicture} diff --git a/mainmatter/relation extraction/marcheggiani plate.tex b/mainmatter/relation extraction/marcheggiani plate.tex @@ -1,7 +1,15 @@ +\begingroup% +\makeatletter +\ifthesis@presentation + \def\sentencevariable{\(\rndmvctr{f}\)} +\else + \def\sentencevariable{\(\rndm{s}\)} +\fi +\makeatother \begin{tikzpicture}[node distance=7mm] \node[pdiag observed] (e) {\(\rndmvctr{e}\)}; - \node[pdiag latent, above=8mm of x] (r) {\(\rndm{r}\)}; - \node[pdiag observed, left=of r] (s) {\(\rndm{s}\)}; + \node[pdiag latent, above=8mm of e] (r) {\(\rndm{r}\)}; + \node[pdiag observed, left=of r] (s) {\sentencevariable}; \node[inner sep=1mm, above=5mm of s] (phi) {\(\vctr{\phi}\)}; \node[inner sep=1mm, right=of e] (theta) {\(\vctr{\theta}\)}; \draw[arrow] (r) -- (e); @@ -13,4 +21,5 @@ \coordinate (plspace) at ($(e.east) + (3mm, 0)$); \node[pdiag plate, inner sep=1mm, fit=(e) (s) (r) (plspace)] (p) {}; \node[anchor=south west] at (p.south west) {\(|\dataSet|\)}; -\end{tikzpicture} +\end{tikzpicture}% +\endgroup% diff --git a/mainmatter/relation extraction/pcnn.tex b/mainmatter/relation extraction/pcnn.tex @@ -5,7 +5,13 @@ sidebrace/.style={decorate,decoration={brace,amplitude=0.3em,mirror}}, sidenote/.style={left,midway,xshift=-0.4em,text depth=0}, transform/.style={rotate=-90,draw,align=center,scale=0.7,rounded corners=0.1cm}] - + \makeatletter + \ifthesis@presentation + \def\positionalSize{\tiny} + \else + \def\positionalSize{\scriptsize} + \fi + \makeatother \foreach \i/\da/\db/\name/\text in { 0/-2/-8/x00/Founded, 1/-1/-7/x01/in, 2/ / /e1/\strong{Rome}, @@ -23,8 +29,8 @@ 14/12/ 6/x25/Philip, 15/13/ 7/x26/\ldots}{ \node[word] (t\name) at (-0.3, -0.3*\i+0.4*0.3) {\footnotesize \text}; - \node at (2.4, -0.3*\i+0.15) {\scriptsize \da}; - \node at (2.8, -0.3*\i+0.15) {\scriptsize \db}; + \node at (2.4, -0.3*\i+0.15) {\positionalSize \(\da\)}; + \node at (2.8, -0.3*\i+0.15) {\positionalSize \(\db\)}; \node[draw,minimum width=20mm,minimum height=0.3cm,anchor=north west] (\name) at (1, -0.3*\i+0.3) {}; } diff --git a/mainmatter/relation extraction/rellda plate.tex b/mainmatter/relation extraction/rellda plate.tex @@ -1,4 +1,3 @@ -\kern1mm% \begin{tikzpicture}[node distance=5mm] \node[pdiag observed, opacity=0] (ff) {\(\rndm{f}\)}; \node[pdiag latent, above=of ff] (r) {\(\rndm{r}_i\)}; @@ -16,15 +15,23 @@ \node[pdiag plate, fit=(p1) (theta) (p2lspace)] (p2) {}; \draw[arrow] (theta) -- (r); - \node[pdiag latent, right=12mm of theta] (alpha) {\(\alpha\)}; - \draw[arrow] (alpha) -- (theta); - \node[pdiag latent, left=15mm of ff] (phi) {\(\rndm{\phi}_{rj}\)}; - \coordinate (p3lspace) at ($(phi.south) + (0,-2mm)$); + \makeatletter + \ifthesis@presentation + \node[right=15mm of theta] (alpha) {\(\alpha\)}; + \node[pdiag latent, left=19mm of ff, inner sep=0.1mm] (phi) {\(\rndm{\phi}_{rj}\)}; + \coordinate (p3lspace) at ($(phi.south) + (0,-3mm)$); + \else + \node[right=12mm of theta] (alpha) {\(\alpha\)}; + \node[pdiag latent, left=15mm of ff] (phi) {\(\rndm{\phi}_{rj}\)}; + \coordinate (p3lspace) at ($(phi.south) + (0,-2mm)$); + \fi + \makeatother + \draw[arrow] (alpha) -- (theta); \node[pdiag plate, inner sep=2.5mm, fit=(phi) (p3lspace)] (p3) {}; \draw[arrow] (phi) -- (f1); - \node[pdiag latent, above=of phi] (beta) {\(\beta\)}; + \node[above=of phi] (beta) {\(\beta\)}; \draw[arrow] (beta) -- (phi); \node[anchor=south east] at (p1.south east) {\(n_d\)}; diff --git a/mainmatter/relation extraction/unsupervised.tex b/mainmatter/relation extraction/unsupervised.tex @@ -353,6 +353,7 @@ In the variational E-step, the relation for each sample \(r_i\) is sampled from P(r_i\mid \vctr{f}_i, d) \propto P(r_i\mid d) \prod_{j=1}^m P(f_{ij}\mid r_i) \end{equation*} \begin{marginfigure} + \kern1mm% The top plate raise a bit to high over the top of the margin column otherwise. \centering \input{mainmatter/relation extraction/rellda plate.tex} \scaption[Rel-\textsc{lda} plate diagram.]{ diff --git a/presentation.tex b/presentation.tex @@ -0,0 +1,23 @@ +\documentclass[presentation]{thesis} + +\title{Deep Learning for Unsupervised Relation Extraction} +\author{Étienne Simon} +\institute{ISIR, Sorbonne Université} +\date{5 july 2022} +\keywords{Machine Learning\sep Deep Learning\sep Natural Language Processing\sep Information Extraction\sep Relation Extraction} + +\usepackage{thesis} +\addbibresource{thesis.bib} + +\begin{document} + \begin{frame}[plain] + \titlepage + \end{frame} + + \include{presentation/context/section.tex} + \include{presentation/fitb/section.tex} + \include{presentation/graph/section.tex} + \include{presentation/conclusion/section.tex} + \appendix + \include{presentation/supplementary/section.tex} +\end{document} diff --git a/presentation/beamercolorthemethesis.sty b/presentation/beamercolorthemethesis.sty @@ -0,0 +1,32 @@ +\mode<presentation> + +\definecolor{palette-A}{HTML}{8C1C13} +\definecolor{palette-B}{HTML}{BF4342} +\definecolor{palette-C}{HTML}{E7D7C1} +\definecolor{palette-D}{HTML}{A78A7F} +\definecolor{palette-E}{HTML}{735751} + +\colorlet{palette-1}{palette-A} +\colorlet{palette-2}{palette-E!30!white} +\colorlet{palette-3}{palette-D!20!white} + +\setbeamercolor{structure}{fg=palette-1} + +\setbeamercolor{palette primary}{fg=palette-C, bg=palette-1} +\setbeamercolor{palette secondary}{fg=black, bg=palette-2} +\setbeamercolor{palette tertiary}{fg=black, bg=palette-3} +\setbeamercolor{palette quaternary}{fg=palette-E, bg=palette-D} +\setbeamercolor{alerted text}{fg=palette-B} + +\setbeamercolor{local structure}{fg=palette primary.bg, use=palette primary} +\setbeamercolor{title}{fg=palette primary.bg, use=palette primary} +\setbeamercolor{section in toc}{fg=palette primary.bg, use=palette primary} + +\setbeamercolor*{section page}{parent=palette primary} +\setbeamercolor*{block title}{parent=palette secondary} +\setbeamercolor*{block body}{parent=palette tertiary} +\setbeamercolor*{progress bar}{parent=palette quaternary} + +\setbeamercolor{page number in head/foot}{parent=palette quaternary} + +\mode<all> diff --git a/presentation/beamerfontthemethesis.sty b/presentation/beamerfontthemethesis.sty @@ -0,0 +1,76 @@ +\usefonttheme{professionalfonts} + +\RequirePackage{fontspec} + +\defaultfontfeatures{Ligatures=TeX} + +\setsansfont{Futura Now Text}[ + UprightFont = * Lt, + ItalicFont = * Lt It, + BoldFont = * Md, + BoldItalicFont = * Md It +] + +\newfontfamily\futuraHeavier{Futura Now Text}[ + UprightFont = * Rg, + ItalicFont = * It, + BoldFont = * Bd, + BoldItalicFont = * Bd It +] + +\newfontfamily\futuraTable{Futura Now Text}[ + Numbers = Monospaced, + UprightFont = * Lt, + ItalicFont = * Lt It, + BoldFont = * Md, + BoldItalicFont = * Md It +] + +\newfontfamily\futuraHeavierTable{Futura Now Text}[ + Numbers = Monospaced, + UprightFont = * Rg, + ItalicFont = * It, + BoldFont = * Bd, + BoldItalicFont = * Bd It +] + +\newfontfamily\futuraCondensed{Futura Now Text}[ + UprightFont = * Cn, + ItalicFont = * Cn It, + BoldFont = * Cn Bd, + BoldItalicFont = * Cn Bd It +] + +\newfontfamily\futuraHeadline{Futura Now Headline}[ + UprightFont = * Rg, + ItalicFont = * It, + BoldFont = * Bd, + BoldItalicFont = * Bd It +] + +\newfontfamily\futuraDisplay{Futura Now Display}[ + UprightFont = * Rg, + ItalicFont = * It, + BoldFont = * Bd, + BoldItalicFont = * Bd It +] + +\AtBeginEnvironment{tabular}{\futuraTable} + +\setbeamerfont{title}{family=\futuraDisplay, size=\Huge} +\setbeamerfont{author}{family=\futuraHeavier, size=\small} +\setbeamerfont{date}{family=\futuraHeavier, size=\small} + +\setbeamerfont{alerted text}{family=\futuraHeavier} + +\setbeamerfont{section page}{family=\futuraDisplay, size=\LARGE} + +\setbeamerfont{block title}{family=\futuraHeavier, size=\normalsize} +\setbeamerfont{block title alerted}{family=\futuraHeavier,size=\normalsize} + +\setbeamerfont{frametitle}{family=\futuraHeadline, size=\large} + +\setbeamerfont{description item}{family=\futuraHeavier} +\setbeamerfont{itemize/enumerate subbody}{size=\normalsize} + +\setbeamerfont{page number in head/foot}{family=\futuraHeavier, size=\scriptsize} diff --git a/presentation/beamerinnerthemethesis.sty b/presentation/beamerinnerthemethesis.sty @@ -0,0 +1,86 @@ +\useinnertheme{default} + +\RequirePackage{tikz} + +\setbeamertemplate{itemize items}[circle] + +\setbeamertemplate{title page}{% + \vfill\vfill% + \begingroup% + \raggedright\linespread{1.0}% + \usebeamerfont{title}\usebeamercolor[fg]{title}% + \inserttitle\par% + \endgroup% + \thesisprogress*{\textwidth-\pgflinewidth}\par% + \vspace*{1em}% + \begingroup% + \usebeamerfont{author}\usebeamercolor[fg]{author}% + \insertauthor\par% + \endgroup% + \vspace*{3mm} + \begingroup% + \usebeamerfont{date}\usebeamercolor[fg]{date}% + \insertdate\par% + \endgroup% + \vspace*{3mm} + \begingroup% + \usebeamerfont{institute}\usebeamercolor[fg]{institute}% + \insertinstitute\par% + \endgroup% + \vfill% + \begin{tikzpicture}[ + ampersand replacement=\&, + jury member/.style={anchor=text, font={\footnotesize\strut}, inner sep=0.1mm}, + affiliation/.style={font={\tiny\strut}, color=black!75, inner sep=0.1mm}, + bracerole/.style={draw=palette-D, line width=0.5mm}, + role/.style={font={\footnotesize\strut}, inner sep=0.4mm}, + ]% + \matrix[matrix of nodes, column sep=15mm, row sep=4mm]{ + \node[jury member] (Allauzen) {Pr.~Alexandre Allauzen}; \& + \node[jury member] (Sébillot) {Pr.~Pascale Sébillot}; \& + \node[jury member] (Piwowarski) {Dr.~Benjamin Piwowarski}; \\ + \node[jury member] (Favre) {Dr.~Benoît Favre}; \& + \node[jury member] (Tannier) {Pr.~Xavier Tannier}; \& + \node[jury member] (Guigue) {Dr.~Vincent Guigue}; \\ + }; + + \node[affiliation] (Allauzen-aff) [below=1mm of Allauzen.south west, right] {Université Paris-Dauphine PSL, ESPCI}; + \node[affiliation] (Favre-aff) [below=1mm of Favre.south west, right] {Aix-Marseille Université}; + \node[affiliation] (Sébillot-aff) [below=1mm of Sébillot.south west, right] {IRISA, INSA Rennes}; + \node[affiliation] (Tannier-aff) [below=1mm of Tannier.south west, right] {Sorbonne Université}; + \node[affiliation] (Piwowarski-aff) [below=1mm of Piwowarski.south west, right] {CNRS, Sorbonne Université}; + \node[affiliation] (Guigue-aff) [below=1mm of Guigue.south west, right] {Sorbonne Université}; + + \draw[bracerole] (Favre-aff.south west) -- (Favre-aff.south east -| Allauzen-aff.east) coordinate[midway] (rapporteurs); + \node[below=1mm of rapporteurs, role] (rapporteurs-text) {Rapporteurs}; + \draw[bracerole] (rapporteurs) -- (rapporteurs-text); + + \draw[bracerole] (Tannier-aff.south west) -- (Tannier-aff.south east -| Sébillot.east) coordinate[midway] (examinateurs); + \node[below=1mm of examinateurs, role] (examinateurs-text) {Examinateurs}; + \draw[bracerole] (examinateurs) -- (examinateurs-text); + + \draw[bracerole] (Guigue-aff.south west) -- (Guigue-aff.south east -| Piwowarski.east) coordinate[midway] (directeurs); + \node[below=1mm of directeurs, role] (directeurs-text) {Directeurs}; + \draw[bracerole] (directeurs) -- (directeurs-text); + \end{tikzpicture}% + \vfill% +} + +\pretocmd{\titlepage}{\addtocounter{framenumber}{-1}}{}{\thesis@patch@error{Class}{Can't start frame number at 0.}} + +\setbeamertemplate{section page}{% + \vspace{2em}% + \insertsection\par% + \thesisprogress{\textwidth-\pgflinewidth}% +} + +\NewDocumentCommand\problemBoxContent{m}{% + \colorbox{palette-2}{\futuraHeavier\strut Problem:}% + \colorbox{palette-3}{\strut \alert{#1}}% +} + +\NewDocumentCommand\problemBox{m}{% + \begin{center}% + \problemBoxContent{#1}% + \end{center}% +} diff --git a/presentation/beamerouterthemethesis.sty b/presentation/beamerouterthemethesis.sty @@ -0,0 +1,23 @@ +\setbeamertemplate{navigation symbols}{} +\setbeamertemplate{page number in head/foot}[framenumber] + +\setbeamertemplate{frametitle}{% + \nointerlineskip% + \usebeamercolor{frametitle}% + \begin{tikzpicture}[overlay, remember picture] + \begin{scope}[shift={(current page.north west)}] + \begin{scope}[yshift=-3.25ex] + \fill[bg] (0, 3.25ex) rectangle (\paperwidth, 0); + \node[anchor=text] at (3mm, 1ex) {\usebeamerfont{frametitle}\insertframetitle}; + \node[anchor=text] at (\paperwidth-3mm, 1ex) {\llap{\usebeamerfont{page number in head/foot}\usebeamercolor{page number in head/foot}\usebeamertemplate{page number in head/foot}}}; + \thesis@progress@bar{\paperwidth} + \end{scope} + \end{scope} + \ifnum\thesis@debug>0 + \node[anchor=north east, scale=0.75, inner sep=0.1mm] at (current page.north east) {\tiny draft \draftVersion}; + \fi + \end{tikzpicture}% + \vspace*{3.25ex}% +} + +\setbeamersize{text margin left=1cm, text margin right=1cm} diff --git a/presentation/beamerthemethesis.sty b/presentation/beamerthemethesis.sty @@ -0,0 +1,35 @@ +\RequirePackage{xparse} + +\NewDocumentCommand\thesis@progress@bar{s m}{% + \usebeamercolor{progress bar}% + \IfBooleanTF{#1}{% + \draw[bg, line width=0.5mm] (0, 0) -- (#2, 0);% + }{% + \pgfmathsetmacro{\current}{min(\insertframenumber, \insertmainframenumber)/(\insertmainframenumber)}% + \draw[fg, line width=0.5mm] (0, 0) -- (\current#2, 0);% + \draw[bg, line width=0.5mm] (\current#2, 0) -- (#2, 0);% + }% +} + +\NewDocumentCommand\thesisprogress{s m}{% + \begin{tikzpicture}% + \IfBooleanTF{#1}{\thesis@progress@bar*{#2}}{\thesis@progress@bar{#2}}% + \end{tikzpicture}% +} + +\AtBeginSection{% + \begingroup% + \setbeamercolor{background canvas}{parent=palette primary}% + \begin{frame}[plain]% + \sectionpage% + \end{frame}% + \endgroup% +} + +\usecolortheme{seahorse} +\usecolortheme{rose} + +\useinnertheme{thesis} +\useoutertheme{thesis} +\usecolortheme{thesis} +\usefonttheme{thesis} diff --git a/presentation/conclusion/contributions.tex b/presentation/conclusion/contributions.tex @@ -0,0 +1,24 @@ +\begin{frame}{Contributions} + \begin{block}{Regularizing Discriminative Methods} + \begin{itemize} + \item Trained a deep (PCNN) classifier. + \item Introduced two regularizing losses: + \begin{itemize} + \item A \alert{skewness} loss to ensure confidence. + \item A \alert{distribution distance} loss to ensure diversity. + \end{itemize} + \item Improved experimental setup: + \begin{itemize} + \item 2 metrics (V-measure, ARI).\quad\raise1.25pt\hbox{\textcolor{palette-1}{\(\bullet\)}} 2 datasets (T-RExes). + \end{itemize} + \end{itemize} + \end{block} + + \begin{block}{Graph-based Aggregate Methods} + \begin{itemize} + \item Explicitly modeled the aggregate setup for the unsupervised problem. + \item Provided proof on the quality of topological information. + \item Proposed an approach to exploit the mutual information between topological and linguistic features. + \end{itemize} + \end{block} +\end{frame} diff --git a/presentation/conclusion/perspectives.tex b/presentation/conclusion/perspectives.tex @@ -0,0 +1,21 @@ +\begin{frame}{Perspectives} + \begin{block}{Short-term} + \begin{itemize} + \item Replace uniform assumption with Zipf-like distribution. + \item Masking neighbors to enforce an information bottleneck. + \item Make soft-positives stronger in triplet loss. + \item Data distribution problem of graph-based models. + \end{itemize} + \end{block} + \begin{block}{Long-term} + \begin{itemize} + \item Using language modeling for relation extraction. + \item Dataset-level modeling hypotheses. + \item Complex relations: + \begin{itemize} + \item \(n\)-ary relations, + \item fact qualifiers. + \end{itemize} + \end{itemize} + \end{block} +\end{frame} diff --git a/presentation/conclusion/questions.tex b/presentation/conclusion/questions.tex @@ -0,0 +1,10 @@ +\begingroup +\setbeamercolor{background canvas}{bg=palette-1} +\begin{frame}[plain] + \vspace{2em}% + \centering% + \usebeamercolor[fg]{palette primary}% + \futuraDisplay\Huge Questions?\par% + \thesisprogress{\textwidth-\pgflinewidth}% +\end{frame} +\endgroup diff --git a/presentation/conclusion/section.tex b/presentation/conclusion/section.tex @@ -0,0 +1,6 @@ +\section{Conclusion} +\label{sec:conclusion} + +\input{presentation/conclusion/contributions.tex} +\input{presentation/conclusion/perspectives.tex} +\input{presentation/conclusion/questions.tex} diff --git a/presentation/context/clustering.tex b/presentation/context/clustering.tex @@ -0,0 +1,47 @@ +\begin{frame}{Clustering Approaches} + \begin{tabular}{@{}>{\hbadness=5000}m{73mm} @{\hspace{7mm}} >{\tikz[baseline={(0,-0.1)}]{\fill[black!30] (0,0.1) -- (0.75,0.1) -- (0.75,0.35) -- (1.25, 0) -- (0.75,-0.35) -- (0.75, -0.1) -- (0, -0.1) -- cycle;}\hspace{5mm}}l@{}} + \tikzmarknode{clustering-c11}{% + \parbox{73mm}{\uhead{Megrez}\textsuperscript{\kern-2.5mm\wdent{850779}} is a star in the northern circumpolar constellation of \utail{Ursa Major}\textsuperscript{\kern-2.5mm\wdent{10460}}.}}% + & + \tikzmarknode{clustering-c12}{\sfTripletHolds{e_1}{part of constellation}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{clustering-c21}{% + \parbox{73mm}{\uhead{Posidonius}\textsuperscript{\kern-2.5mm\wdent{185770}} was a Greek philosopher, astronomer, historian, mathematician, and teacher native to \utail{Apamea, Syria}\textsuperscript{\kern-2.5mm\wdent{617550}}.}}% + & + \tikzmarknode{clustering-c22}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{clustering-c31}{% + \parbox{73mm}{\uhead{Hipparchus}\textsuperscript{\kern-2.5mm\wdent{159905}} was born in \utail{Nicaea, Bithynia}\textsuperscript{\kern-2.5mm\wdent{739037}}, and probably died on the island of Rhodes, Greece.}}% + & + \tikzmarknode{clustering-c32}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \end{tabular}% + \pause + \begin{tikzpicture}[remember picture, overlay] + \fill[fill opacity=0.67, white] (clustering-c11.north east) rectangle (clustering-c31.south-|clustering-c12.east); + \fill[pattern={Lines[distance=1mm, angle=45, line width=0.3mm]}, pattern color=white] (clustering-c11.north east) rectangle (clustering-c31.south-|clustering-c12.east); + \end{tikzpicture} + \pause + \begin{tikzpicture}[remember picture, overlay] + \node[draw, fit=(clustering-c11.north west) (clustering-c11.south east), dashed, ultra thick, Dark2-A] {}; + \node[draw, fit=(clustering-c21.north west) (clustering-c31.south east), dashed, ultra thick, Dark2-B] {}; + \end{tikzpicture} + + \bigskip + + \begin{minipage}{6cm} + \(\text{Same cluster} \iff \text{Same relation}\)\\ + Induced clusters need \alert{not} be labeled with a relation. + \end{minipage}% + \hfill% + \pause% + \raisebox{0mm}[0mm][0mm]{% + \begin{minipage}{7cm} + \begin{block}{Clustering Metrics} + \begin{description} + \item[\bcubed] Similar to standard \fone{} + \item[V-measure] Entropic \fone{} + \item[ARI] Pair of samples consistency + \end{description} + \end{block} + \end{minipage}}% +\end{frame} diff --git a/presentation/context/contributions.tex b/presentation/context/contributions.tex @@ -0,0 +1,41 @@ +\begin{frame}{Contributions}% + \centering% + \begin{tikzpicture}[ + field/.style={text width=135mm, anchor=north west, inner sep=0.5mm, yshift=-5mm}, + details/.style={text width=135mm, anchor=north west, inner sep=0.5mm, yshift=1mm}, + paper/.style={draw, palette-A, fill=palette-3}, + annotation/.style={anchor=south east, font=\futuraHeavier, text=palette-1}, + ] + + \node[field] (fitb) {Étienne Simon, Vincent Guigue, Benjamin Piwowarski. {\futuraHeavier \citefield{fitb}[linkedtitle]{title}} ACL~\cite*{fitb}}; + \only<2->{\node[details] at (fitb.south west) (fitb-details) { + \begin{itemize} + \item Introduce relation distribution losses + \item First to train a deep RE classifier without supervision + \item Improve over then SOTA + \end{itemize}}}; + \begin{scope}[on background layer] + \coordinate (fitbse) at ($(fitb.south east) + (-2mm, 0)$);% FIXME + \only<1>{\node[paper, fit=(fitb.north west) (fitbse)] (fitb-block) {};} + \only<2->{\node[paper, fit=(fitb) (fitb-details)] (fitb-block) {};} + \end{scope} + \node[annotation] at (fitb-block.south east) {Part 1}; + + + \only<1>{\coordinate (next-anchor) at (fitb-block.south west);} + \only<2->{\coordinate (next-anchor) at (fitb-details.south west);} + + \node[field] at (next-anchor) (graph) {Étienne Simon, Vincent Guigue, Benjamin Piwowarski. {\futuraHeavier ``Graph-Based Unsupervised Relation Extraction''} Work in progress}; + \only<3>{\node[details] at (graph.south west) (graph-details) { + \begin{itemize} + \item Evaluate the quantity of topological information available + \item Explicitly exploit aggregate setup for unsupervised RE + \item Draw parallels between WL isomorphism test and unsupervised RE + \end{itemize}}}; + \begin{scope}[on background layer] + \only<1-2>{\node[paper, fit=(graph)] (graph-block) {};} + \only<3>{\node[paper, fit=(graph) (graph-details)] (graph-block) {};} + \end{scope} + \node[annotation] at (graph-block.south east) {Part 2}; + \end{tikzpicture} +\end{frame} diff --git a/presentation/context/history.tex b/presentation/context/history.tex @@ -0,0 +1,28 @@ +\begin{frame}{Historical Context} + \includegraphics[align=c, width=35mm]{presentation/context/otter1.jpg} + \includegraphics[align=c, width=35mm]{presentation/context/otter2.jpg} + \(\implies\) An ``otter'' entity exists. + + \pause + \smallskip + + \includegraphics[align=c, width=35mm]{presentation/context/otter inside box.jpg} + \includegraphics[align=c, width=35mm]{presentation/context/person inside room.jpg} + \(\implies\) An ``\textsl{inside of}'' relation exists. + + \pause + \medskip + + \alert{Structuralism}: interrelations are keys to our understanding of the world. + + \pause + \begin{tikzpicture} + \draw[dashed] (\pgflinewidth-\textwidth, 0) -- (0, 0); + \node[anchor=south east] {\alert{Realism}}; + \node[anchor=north east] {\alert{Nominalism}}; + \end{tikzpicture} + + Entities and relations are unproductive concepts. + + They only capture synonymy. +\end{frame} diff --git a/presentation/context/otter inside box.jpg b/presentation/context/otter inside box.jpg Binary files differ. diff --git a/presentation/context/otter1.jpg b/presentation/context/otter1.jpg Binary files differ. diff --git a/presentation/context/otter2.jpg b/presentation/context/otter2.jpg Binary files differ. diff --git a/presentation/context/person inside room.jpg b/presentation/context/person inside room.jpg Binary files differ. diff --git a/presentation/context/section.tex b/presentation/context/section.tex @@ -0,0 +1,6 @@ +\input{presentation/context/history.tex} +\input{presentation/context/symbolic.tex} +\input{presentation/context/task.tex} +\input{presentation/context/clustering.tex} +\input{presentation/context/similarity function.tex} +\input{presentation/context/contributions.tex} diff --git a/presentation/context/similarity function.tex b/presentation/context/similarity function.tex @@ -0,0 +1,49 @@ +\begin{frame}{Similarity Function Approaches} + \begin{tabular}{@{}>{\hbadness=5000}m{73mm} @{\hspace{7mm}} >{\tikz[baseline={(0,-0.1)}]{\fill[black!30] (0,0.1) -- (0.75,0.1) -- (0.75,0.35) -- (1.25, 0) -- (0.75,-0.35) -- (0.75, -0.1) -- (0, -0.1) -- cycle;}\hspace{5mm}}l@{}} + \tikzmarknode{fewshot-c11}{% + \parbox{73mm}{\uhead{Megrez}\textsuperscript{\kern-2.5mm\wdent{850779}} is a star in the northern circumpolar constellation of \utail{Ursa Major}\textsuperscript{\kern-2.5mm\wdent{10460}}.}}% + & + \tikzmarknode{fewshot-c12}{\sfTripletHolds{e_1}{part of constellation}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{fewshot-c21}{% + \parbox{73mm}{\uhead{Posidonius}\textsuperscript{\kern-2.5mm\wdent{185770}} was a Greek philosopher, astronomer, historian, mathematician, and teacher native to \utail{Apamea, Syria}\textsuperscript{\kern-2.5mm\wdent{617550}}.}}% + & + \tikzmarknode{fewshot-c22}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{fewshot-c31}{% + \parbox{73mm}{\uhead{Hipparchus}\textsuperscript{\kern-2.5mm\wdent{159905}} was born in \utail{Nicaea, Bithynia}\textsuperscript{\kern-2.5mm\wdent{739037}}, and probably died on the island of Rhodes, Greece.}}% + & + \tikzmarknode{fewshot-c32}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \end{tabular}% + \pause + \begin{tikzpicture}[remember picture, overlay] + \fill[white] (fewshot-c11.north east) rectangle (fewshot-c31.south-|fewshot-c12.east); + \end{tikzpicture} + \pause + \begin{tikzpicture}[ + remember picture, + overlay, + fsbrace/.style={decorate, decoration={brace,amplitude=5}}, + fsvar/.style={midway, anchor=west, xshift=1mm}, + ] + \draw[fsbrace] (fewshot-c11.north east) -- (fewshot-c11.south east) node[fsvar] {\vphantom{\(x\)}\smash{\textcolor{Dark2-B}{\(x_1\)}}}; + \draw[fsbrace] (fewshot-c21.north east) -- (fewshot-c21.south east) node[fsvar] {\vphantom{\(x\)}\smash{\textcolor{Dark2-A}{\(x_2\)}}}; + \draw[fsbrace] (fewshot-c31.north east) -- (fewshot-c31.south east) node[fsvar] {\vphantom{\(x\)}\smash{\textcolor{Dark2-C}{\(x_3\)}}}; + + \node[right=2cm of fewshot-c21, text width=5cm] {% + Learn a similarity function\\ + \(\operatorname{sim}\colon \dataSet\times\dataSet\to\symbb{R}\) + + \bigskip + + \(\operatorname{sim}(\textcolor{Dark2-B}{x_1}, \textcolor{Dark2-A}{x_2}) < \operatorname{sim}(\textcolor{Dark2-A}{x_2}, \textcolor{Dark2-C}{x_3})\)\\ + \(\operatorname{sim}(\textcolor{Dark2-B}{x_1}, \textcolor{Dark2-C}{x_3}) < \operatorname{sim}(\textcolor{Dark2-A}{x_2}, \textcolor{Dark2-C}{x_3})\)\\ + }; + \end{tikzpicture} + + \bigskip + + 5~way 1~shot: given 1 query and 5 candidates, which of the candidates is most similar to the query? + + Evaluated using accuracy. +\end{frame} diff --git a/presentation/context/symbolic.tex b/presentation/context/symbolic.tex @@ -0,0 +1,27 @@ +\begin{frame}{Symbolic and Distributed Representations} + \begin{columns}% + \begin{column}{8cm}% + \begin{block}{Information Extraction} + Maps between two symbolic representations (text and knowledge bases).\\ + Knowledge bases are set of facts:\\ + \hspace{5mm}(entity, \textsl{relation}, entity) + \end{block}% + \end{column}% + \begin{column}{5cm}% + \small% + \raisebox{-0.5\totalheight}[0.45\totalheight][0.5\totalheight]{% + \input{mainmatter/relation extraction/ie steps.tex}% + }% + \end{column}% + \end{columns}% + \pause% + \begin{block}{Symbolic Representations} + \(\text{symbol}\leftrightarrow\text{concept}\)\\ + e.g.: one-hot vector, text (Paris is the capital of France),\\ + \hphantom{e.g.:} knowledge base (Paris\textsuperscript{\wdent{90}}, \textsl{capital}\textsuperscript{\wdrel{1376}}, France\textsuperscript{\wdent{142}}) + \end{block} + \begin{block}{Distributed Representations} + \(\text{concept}\rightarrow\text{several units}\); \(\text{unit}\rightarrow\text{part of several concepts}\)\\ + e.g.: embeddings, neural network activations + \end{block}% +\end{frame} diff --git a/presentation/context/task.tex b/presentation/context/task.tex @@ -0,0 +1,24 @@ +\begin{frame}{Task: Unsupervised Relation Extraction} + \begin{tabular}{@{}>{\hbadness=5000}m{73mm} @{\hspace{7mm}} >{\tikz[baseline={(0,-0.1)}]{\fill[black!30] (0,0.1) -- (0.75,0.1) -- (0.75,0.35) -- (1.25, 0) -- (0.75,-0.35) -- (0.75, -0.1) -- (0, -0.1) -- cycle;}\hspace{5mm}}l@{}} + \tikzmarknode{task-c11}{% + \parbox{73mm}{\uhead{Megrez}\textsuperscript{\kern-2.5mm\wdent{850779}} is a star in the northern circumpolar constellation of \utail{Ursa Major}\textsuperscript{\kern-2.5mm\wdent{10460}}.}}% + & + \tikzmarknode{task-c12}{\sfTripletHolds{e_1}{part of constellation}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{task-c21}{% + \parbox{73mm}{\uhead{Posidonius}\textsuperscript{\kern-2.5mm\wdent{185770}} was a Greek philosopher, astronomer, historian, mathematician, and teacher native to \utail{Apamea, Syria}\textsuperscript{\kern-2.5mm\wdent{617550}}.}}% + & + \tikzmarknode{task-c22}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{task-c31}{% + \parbox{73mm}{\uhead{Hipparchus}\textsuperscript{\kern-2.5mm\wdent{159905}} was born in \utail{Nicaea, Bithynia}\textsuperscript{\kern-2.5mm\wdent{739037}}, and probably died on the island of Rhodes, Greece.}}% + & + \tikzmarknode{task-c32}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \end{tabular}% + + \bigskip + + In an \alert{unsupervised} fashion.\\ + Two kind of approaches: clustering and similarity function.\\ + \strut% FIXME +\end{frame} diff --git a/presentation/fitb/classifier.tex b/presentation/fitb/classifier.tex @@ -0,0 +1,11 @@ +\begin{frame}{Relation Classifier: PCNN}% + \centering% + \input{mainmatter/relation extraction/pcnn.tex} + \begin{tikzpicture}[remember picture, overlay] + \node[anchor=south east, xshift=-1cm] at (current page.south east) {\(\displaystyle + \overbrace{P(e_{-i} \mid s, e_i)}^{\text{fill-in-the-blank}} + = \sum_{r\in\relationSet} \alert{\overbrace{P(r\mid s)}^{\text{classifier}}} + \overbrace{P(e_{-i} \mid r, e_i)}^{\text{entity predictor}} + \)}; + \end{tikzpicture}% +\end{frame} diff --git a/presentation/fitb/clustering.tex b/presentation/fitb/clustering.tex @@ -0,0 +1,30 @@ +\begin{frame}{Clustering Approaches} + \begin{tabular}{@{}>{\hbadness=5000}m{73mm} @{\hspace{7mm}} >{\tikz[baseline={(0,-0.1)}]{\fill[black!30] (0,0.1) -- (0.75,0.1) -- (0.75,0.35) -- (1.25, 0) -- (0.75,-0.35) -- (0.75, -0.1) -- (0, -0.1) -- cycle;}\hspace{5mm}}l@{}} + \tikzmarknode{clustering-c11}{% + \parbox{73mm}{\uhead{Megrez}\textsuperscript{\kern-2.5mm\wdent{850779}} is a star in the northern circumpolar constellation of \utail{Ursa Major}\textsuperscript{\kern-2.5mm\wdent{10460}}.}}% + & + \tikzmarknode{clustering-c12}{\sfTripletHolds{e_1}{part of constellation}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{clustering-c21}{% + \parbox{73mm}{\uhead{Posidonius}\textsuperscript{\kern-2.5mm\wdent{185770}} was a Greek philosopher, astronomer, historian, mathematician, and teacher native to \utail{Apamea, Syria}\textsuperscript{\kern-2.5mm\wdent{617550}}.}}% + & + \tikzmarknode{clustering-c22}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{clustering-c31}{% + \parbox{73mm}{\uhead{Hipparchus}\textsuperscript{\kern-2.5mm\wdent{159905}} was born in \utail{Nicaea, Bithynia}\textsuperscript{\kern-2.5mm\wdent{739037}}, and probably died on the island of Rhodes, Greece.}}% + & + \tikzmarknode{clustering-c32}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \end{tabular}% + \begin{tikzpicture}[remember picture, overlay] + \fill[fill opacity=0.67, white] (clustering-c11.north east) rectangle (clustering-c31.south-|clustering-c12.east); + \fill[pattern={Lines[distance=1mm, angle=45, line width=0.3mm]}, pattern color=white] (clustering-c11.north east) rectangle (clustering-c31.south-|clustering-c12.east); + \node[draw, fit=(clustering-c11.north west) (clustering-c11.south east), dashed, ultra thick, Dark2-A] {}; + \node[draw, fit=(clustering-c21.north west) (clustering-c31.south east), dashed, ultra thick, Dark2-B] {}; + \end{tikzpicture} + + \bigskip + + \(\text{Same cluster} \iff \text{Same relation}\)\\ + Induced clusters need \alert{not} be labeled with a relation.\\ + Evaluated using clustering metrics similar to standard \fone{}/precision/recall. +\end{frame} diff --git a/presentation/fitb/conclusion.tex b/presentation/fitb/conclusion.tex @@ -0,0 +1,17 @@ +\begin{frame}{Conclusion} + \begin{block}{Take-home Message} + Selecting good regularizations to enforce modeling hypotheses enables us to train a deep classifier. + \end{block} + \begin{block}{Contributions} + \begin{itemize} + \item Train a PCNN without supervision + \item Designed two regularization losses (Skewness, Distribution distance) + \item Introduced new datasets (T-RExes) + \item Evaluated using additional metrics (V-measure, ARI) + \end{itemize} + \end{block} + + \medskip + + Étienne Simon, Vincent Guigue, Benjamin Piwowarski. {\futuraHeavier \citefield{fitb}[linkedtitle]{title}} ACL~\cite*{fitb} +\end{frame} diff --git a/presentation/fitb/deep fail.tex b/presentation/fitb/deep fail.tex @@ -0,0 +1,55 @@ +\begin{frame}{Using a Deep Encoder: NYT+FB Results} + \begin{block}{Experimental Setup} + \begin{minipage}{6cm} + We introduced: + \begin{itemize} + \item 2 metrics (V-measure, ARI) + \item 2 datasets (T-RExes) + \end{itemize} + \end{minipage}% + \hfill% + \begin{minipage}{7cm} + \begin{description} + \item[\bcubed] Similar to standard \fone{} + \item[V-measure] Entropic \fone{} + \item[ARI] Pair of samples consistency + \end{description} + \end{minipage}% + \end{block} + + \bigskip + + \centering% + {\usebeamercolor[fg]{alerted text}% + \begin{tikzpicture}[ + remember picture, + overlay, + highlight/.style={fill=fg!30!white, inner sep=0.5mm}, + ] + \only<2>{\node[highlight, fit=(deepfail-c11) (deepfail-c11-|deepfail-c21.west) (deepfail-c29)] {};} + \only<3>{\node[highlight, fit=(deepfail-c31) (deepfail-c32) (deepfail-c31-|deepfail-c21.west) (deepfail-c39)] {};} + \only<4>{\node[highlight, fit=(deepfail-c41) (deepfail-c42) (deepfail-c41-|deepfail-c21.west) (deepfail-c49)] {};} + \end{tikzpicture}}% + \begin{tabular}[b]{c c r r r r r r r} + \toprule + \multicolumn{2}{c}{Model} & \multicolumn{3}{c}{\bcubed} & \multicolumn{3}{c}{V-measure} & \multirow{2}{*}{\textsc{ari}} \\ + \cmidrule(lr){1-2}\cmidrule(lr){3-5}\cmidrule(lr){6-8} + Classifier & Reg. & \fone & Prec. & Rec. & \fone & Hom. & Comp. & \\ + \midrule + \tikzmarknode{deepfail-c11}{rel-LDA} & & 29.1 & 24.8 & 35.2 & 30.0 & 26.1 & 35.1 & \tikzmarknode{deepfail-c19}{13.3} \\ + \tikzmarknode{deepfail-c21}{rel-LDA1} & & 36.9 & 30.4 & 47.0 & 37.4 & 31.9 & 45.1 & \tikzmarknode{deepfail-c29}{24.2} \\ + \tikzmarknode{deepfail-c31}{Linear} & \tikzmarknode{deepfail-c32}{\loss{vae reg}} & 35.2 & 23.8 & 67.1 & 27.0 & 18.6 & 49.6 & \tikzmarknode{deepfail-c39}{18.7} \\ + \tikzmarknode{deepfail-c41}{PCNN} & \tikzmarknode{deepfail-c42}{\loss{vae reg}} & 27.6 & 24.3 & 31.9 & 24.7 & 21.2 & 29.6 & \tikzmarknode{deepfail-c49}{15.7} \\ + \bottomrule + \end{tabular} + + \bigskip + \raggedright + + \strut% + \only<2>{\citeauthor{rellda} \citefield{rellda}[linkedtitle]{title} \citefield{rellda}{shortseries}~\cite*{rellda}}% + \only<3>{\citeauthor{vae_re} \citefield{vae_re}[linkedtitle]{title} \citefield{vae_re}{shortseries}~\cite*{vae_re}}% + \only<1-2, 4->{\newline}% + \strut% + \uncover<4>{\par\vspace{-8mm}\problemBox{Using a deep encoder does not work.}} +\end{frame} diff --git a/presentation/fitb/distribution distance.tex b/presentation/fitb/distribution distance.tex @@ -0,0 +1,19 @@ +\begin{frame}{Distribution Distance Loss}% + \begin{columns}% + \begin{column}{6cm}% + \centering% + \input{mainmatter/fitb/problem 2.tex}% + \end{column} + \begin{column}{7cm}% + \begin{block}{Ensure Diversity} + \begin{equation*} + \loss{D}(\vctr{\phi}) = \kl(P(\rndm{R}\mid\vctr{\phi}) \mathrel{\|} \uniformDistribution(\relationSet)) + \end{equation*} + + \bigskip + + At the level of the dataset (or mini-batch) the distribution of relations must be uniform. + \end{block} + \end{column} + \end{columns} +\end{frame} diff --git a/presentation/fitb/entity predictor.tex b/presentation/fitb/entity predictor.tex @@ -0,0 +1,39 @@ +\begin{frame}{Entity Predictors}% + \begin{block}{Hybrid \parencite{vae_re}}% + \(\psi(e_1, r, e_2) = \psi_\text{SP}(e_1, r, e_2) + \psi_\text{RESCAL}(e_1, r, e_2)\) + + \bigskip + + \(P(e_1 \mid r, e_2) = \displaystyle \frac{\exp \psi(e_1, r, e_2)}{\sum_{e'\in\entitySet} \exp \psi(e', r, e_2)} \) + \end{block}% + \begin{columns}[T]% + \begin{column}{6.5cm}% + \begin{block}{Selectional Preferences}% + \(\psi_\text{SP}(e_1, r, e_2) = \vctr{u}_{e_1}\transpose \vctr{a}_r + \vctr{u}_{e_2}\transpose \vctr{b}_r\) + + \medskip + + \(\mtrx{U}\in\symbb{R}^{\entitySet\times d}\) entity embeddings\\ + \(\mtrx{A}, \mtrx{B}\in\symbb{R}^{\relationSet\times d}\) relation embeddings + \end{block}% + \end{column}% + \begin{column}{6.5cm}% + \begin{block}{RESCAL}% + \(\psi_\text{RESCAL}(e_1, r, e_2) = \vctr{u}_{e_1}\transpose \mtrx{C}_r \vctr{u}_{e_2}\) + + \medskip + + \(\mtrx{U}\in\symbb{R}^{\entitySet\times d}\) entity embeddings\\ + \(\tnsr{C}\in\symbb{R}^{\relationSet\times d\times d}\) relation embeddings + \end{block}% + \end{column}% + \end{columns}% + \vspace*{1cm} + \begin{tikzpicture}[remember picture, overlay] + \node[anchor=south east, xshift=-1cm] at (current page.south east) {\(\displaystyle + \overbrace{P(e_{-i} \mid s, e_i)}^{\text{fill-in-the-blank}} + = \sum_{r\in\relationSet} \overbrace{P(r\mid s)}^{\text{classifier}} + \alert{\overbrace{P(e_{-i} \mid r, e_i)}^{\text{entity predictor}}} + \)}; + \end{tikzpicture} +\end{frame} diff --git a/presentation/fitb/marcheggiani.tex b/presentation/fitb/marcheggiani.tex @@ -0,0 +1,36 @@ +\begin{frame}[t]{Related Work: Marcheggiani (discriminative, 2016)}% + \begin{columns}[T]% + \begin{column}{6.5cm}% + A conditional \(\beta\)-VAE: + \begin{center}% + \input{mainmatter/relation extraction/marcheggiani plate.tex} + + \tikz{\draw[arrow, dashed] (0, 0) -- (1cm, 0);} Encoder + + \tikz{\draw[arrow] (0, 0) -- (1cm, 0);} Decoder + \end{center} + Autoencode the entities \(\rndmvctr{e}\) given the sentence features \(\rndmvctr{f}\). + \end{column}% + \begin{column}{7.5cm}% + \(\loss{vae}(\vctr{\theta}, \vctr{\phi}) = \symcal{L}_\text{reconstruction}(\vctr{\theta}, \vctr{\phi}) + \loss{vae reg}(\vctr{\phi})\) + + \medskip + + \(\loss{vae reg}(\vctr{\phi}) = \kl(Q(\rndm{r}\mid \rndmvctr{e}; \vctr{\phi}) \mathrel{\|} \uniformDistribution(\relationSet))\) + + \vspace{1cm} + + Assume \hypothesis{uniform}: All relations occur with equal frequency.\\ + \(\displaystyle \forall r\in\relationSet\colon P(r) = \frac{1}{|\relationSet|}\) + + \medskip + + Assume \hypothesis{\(1\to1\)}: All relations are bijective.\\ + \( \forall r\in\relationSet\colon r\relationComposition \breve{r} \relationOr \relationIdentity = \breve{r}\relationComposition r \relationOr \relationIdentity = \relationIdentity \) + \end{column}% + \end{columns} + + \bigskip + + \problemBox{Still uses hand designed features.} +\end{frame} diff --git a/presentation/fitb/negative sampling.tex b/presentation/fitb/negative sampling.tex @@ -0,0 +1,56 @@ +\tikzset{ + nsnode/.style={inner sep=0.5mm}, + nshighlight/.style={draw=#1, fill=#1!30!white, inner sep=0}, +} +\begin{frame}{Negative Sampling Approximation} + \begin{tikzpicture}[remember picture, overlay, on background layer] + \only<2->{ + \coordinate (negsampl-dataset-left) at ($(pic cs:negsampl-1) + (-15mm, 0)$); + \coordinate (negsampl-dataset-i) at ($(pic cs:negsampl-1) + (-6mm, 0.3em)$); + \node[nshighlight=Dark2-A, fit=(negsampl-dataset)] (negsampl-dataset-n) {}; + \draw[arrow, Dark2-A, rounded corners=1mm] (negsampl-dataset-n.west) -- (negsampl-dataset-n.west-|negsampl-dataset-left) -- (negsampl-dataset-i-|negsampl-dataset-left) -- (negsampl-dataset-i); + } + \only<3->{ + \coordinate (negsampl-classifier-left) at ($(pic cs:negsampl-2) + (-14mm, 0)$); + \coordinate (negsampl-classifier-i) at ($(pic cs:negsampl-2) + (-6mm, 0.3em)$); + \node[nshighlight=Dark2-B, fit=(negsampl-classifier)] (negsampl-classifier-n) {}; + \draw[arrow, Dark2-B, rounded corners=1mm] (negsampl-classifier-n.west) -- (negsampl-classifier-n.west-|negsampl-classifier-left) -- (negsampl-classifier-i-|negsampl-classifier-left) -- (negsampl-classifier-i); + } + \only<4->{ + \coordinate (negsampl-positive-right) at ($(negsampl-negative2.east) + (11mm, 0)$); + \coordinate (negsampl-positive-i) at ($(pic cs:negsampl-3) + (1mm, 0.3em)$); + \node[nshighlight=Dark2-C, fit=(negsampl-positive)] (negsampl-positive-n) {}; + \draw[arrow, Dark2-C, rounded corners=1mm] (negsampl-positive-n.east) -- (negsampl-positive-n.east-|negsampl-positive-right) -- (negsampl-positive-i-|negsampl-positive-right) -- (negsampl-positive-i); + } + \only<5->{ + \coordinate (negsampl-negative-right) at ($(negsampl-negative2.east) + (10mm, 0)$); + \coordinate (negsampl-negative-i) at ($(pic cs:negsampl-4) + (1mm, 0.3em)$); + \node[nshighlight=Dark2-D, fit=(negsampl-negative1) (negsampl-negative2)] (negsampl-negative-n) {}; + \draw[arrow, Dark2-D, rounded corners=1mm] (negsampl-negative-n.east) -- (negsampl-negative-n.east-|negsampl-negative-right) -- (negsampl-negative-i-|negsampl-negative-right) -- (negsampl-negative-i); + } + \end{tikzpicture} + \begin{equation*} + \begin{split} + \loss{ep}(\vctr{\theta}, \vctr{\phi}) = + \expectation_{\raisebox{-3mm}[0mm][0mm]{\(\substack{\tikzmarknode[nsnode]{negsampl-dataset}{(\rndm{s}, \rndm{e}_1, \rndm{e}_2)\sim \uniformDistribution(\dataSet)}\\\tikzmarknode[nsnode]{negsampl-classifier}{\rndm{r}\sim \operatorname{\text{PCNN}}(\rndm{s}; \vctr{\phi})}}\)}} + \bigg[ & \tikzmarknode{negsampl-positive}{- \log \sigmoid \left( \psi(\rndm{e}_1, \rndm{r}, \rndm{e}_2; \vctr{\theta})\right)} \\ + & \tikzmarknode{negsampl-negative1}{- \sum_{j=1}^k \expectation_{\rndm{e}'\sim\uniformDistribution_\dataSet(\entitySet)} \left[ \log \sigmoid \left( - \psi(\rndm{e}_1, \rndm{r}, \rndm{e}'; \vctr{\theta})\right) \right]} \\ + & \tikzmarknode{negsampl-negative2}{- \sum_{j=1}^k \expectation_{\rndm{e}'\sim\uniformDistribution_\dataSet(\entitySet)} \left[ \log \sigmoid \left( - \psi(\rndm{e}', \rndm{r}, \rndm{e}_2; \vctr{\theta})\right) \right]} \bigg] + \end{split} + \end{equation*} + + \bigskip + + \pause + + \begin{center} + \begin{minipage}{11cm} + \begin{enumerate}[<+->] + \item \tikzmark{negsampl-1}Take a sample uniformly from the dataset. + \item \tikzmark{negsampl-2}Sample a relation \(\rndm{r}\) from the output of the PCNN classifier. + \item Increase the energy of this fact.\tikzmark{negsampl-3} + \item Decrease the energy of negative facts. (\hypothesis{\(1\to1\)})\tikzmark{negsampl-4} + \end{enumerate} + \end{minipage} + \end{center} +\end{frame} diff --git a/presentation/fitb/pcnn.tex b/presentation/fitb/pcnn.tex @@ -0,0 +1,8 @@ +\begin{frame}{Supervised (old) SOTA: PCNN}% + \centering% + \input{mainmatter/relation extraction/pcnn.tex}% + \vspace*{5mm}% + \par% + \raggedright% + \citeauthor{pcnn} \citefield{pcnn}[linkedtitle]{title} \citefield{pcnn}{shortseries}~\cite*{pcnn}% +\end{frame} diff --git a/presentation/fitb/plan.tex b/presentation/fitb/plan.tex @@ -0,0 +1,10 @@ +\begin{frame}{Plan} + \begin{enumerate} + \item Related work + \item Limitation: can't train deep classifier + \item Model details + \item Analysis of limitation + \item Proposed solution + \item Results + \end{enumerate} +\end{frame} diff --git a/presentation/fitb/problems.tex b/presentation/fitb/problems.tex @@ -0,0 +1,54 @@ +\begin{frame}{Source of Low Scores}% + \begin{block}{Degenerate distributions} + \begin{columns}% + \begin{column}{6.5cm}% + \centering% + \begin{tikzpicture} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{4.5}{0/0.32, 1/0.35, 2/0.31, 3/0.37, 4/0.38, 5/0.36, 6/0.34, 7/0.36, 8/0.36, 9/0.31} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{4}{0/0.31, 1/0.37, 2/0.38, 3/0.35, 4/0.32, 5/0.33, 6/0.37, 7/0.36, 8/0.32, 9/0.35} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{3.5}{0/0.32, 1/0.35, 2/0.31, 3/0.38, 4/0.32, 5/0.33, 6/0.37, 7/0.33, 8/0.32, 9/0.33} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{3}{0/0.33, 1/0.31, 2/0.34, 3/0.36, 4/0.34, 5/0.33, 6/0.37, 7/0.35, 8/0.36, 9/0.32} + \node at (0.75, 2.665) {\(\vdots\)}; + \end{tikzpicture}% + \end{column} + \begin{column}{6.5cm}% + \centering% + \begin{tikzpicture} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{5.5}{0/0.02, 1/0.05, 2/0.01, 3/0.87, 4/0.08, 5/0.06, 6/0.04, 7/0.06, 8/0.06, 9/0.01} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{5}{0/0.01, 1/0.07, 2/0.08, 3/0.85, 4/0.02, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{4.5}{0/0.02, 1/0.05, 2/0.01, 3/0.88, 4/0.02, 5/0.03, 6/0.07, 7/0.03, 8/0.02, 9/0.03} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{4}{0/0.03, 1/0.01, 2/0.04, 3/0.86, 4/0.04, 5/0.03, 6/0.07, 7/0.05, 8/0.06, 9/0.02} + \node at (0.75, 3.665) {\(\vdots\)}; + \end{tikzpicture}% + \end{column} + \end{columns} + \end{block} + \begin{columns} + \begin{column}{6cm}% + \begin{block}{Desired distribution}% + \centering% + \begin{tikzpicture} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{1.5}{0/0.02, 1/0.05, 2/0.01, 3/0.07, 4/0.88, 5/0.06, 6/0.04, 7/0.06, 8/0.06, 9/0.01} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{1}{0/0.01, 1/0.07, 2/0.88, 3/0.05, 4/0.02, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{0.5}{0/0.02, 1/0.05, 2/0.01, 3/0.88, 4/0.02, 5/0.03, 6/0.07, 7/0.03, 8/0.02, 9/0.03} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{0}{0/0.03, 1/0.01, 2/0.04, 3/0.06, 4/0.04, 5/0.03, 6/0.87, 7/0.05, 8/0.06, 9/0.02} + \node at (0.75, -0.335) {\(\vdots\)}; + \end{tikzpicture}% + \end{block} + \end{column}% + \begin{column}{7cm}% + \begin{block}{VAE Model Reminder (Marcheggiani)} + \vspace*{3mm}% + \(\displaystyle\overbrace{P(e_{-i} \mid s, e_i)}^{\text{fill-in-the-blank}} = \sum_{r\in\relationSet} \overbrace{P(r\mid s)}^{\text{classifier}} \overbrace{P(e_{-i} \mid r, e_i)}^{\text{entity predictor}}\) + + \bigskip + + \(\loss{vae reg}(\vctr{\phi}) = \kl(Q(\rndm{r}\mid \rndmvctr{e}; \vctr{\phi}) \mathrel{\|} \uniformDistribution(\relationSet))\) + \end{block} + \end{column}% + \end{columns}% + \pause + \begin{tikzpicture}[overlay, remember picture] + \node[inner sep=0, draw=black] at (current page.center) {\problemBoxContent{Marcheggiani's model cannot handle deep encoder.}}; + \end{tikzpicture} +\end{frame} diff --git a/presentation/fitb/qualitative.tex b/presentation/fitb/qualitative.tex @@ -0,0 +1,52 @@ +\begin{frame}{Qualitative Results: Confusion Matrices}% + \centering% + \scriptsize\futuraTable% + \renderConfusionsDimensions{2.75}{0.23}{0.12} + \begin{tikzpicture} + \renderConfusions* + {mainmatter/fitb/confusion lda.xml}{Rel-LDA1} + {mainmatter/fitb/confusion vae.xml}{\(\text{Linear}+\loss{VAE REG}\)} + {mainmatter/fitb/confusion regularized vae.xml}{\(\text{Linear}+\loss{S}+\loss{D}\)} + {mainmatter/fitb/confusion pcnn.xml}{\(\text{PCNN}+\loss{S}+\loss{D}\)} + \begin{pgfonlayer}{background} + \node[visible on=<2>, fill=Dark2-A!50, inner sep=2mm, fit=(confusion-cell-1-1-1) (confusion-cell-1-15-10)] {}; + \node[visible on=<2>, fill=Dark2-B!50, inner sep=2mm, fit=(confusion-cell-2-1-1) (confusion-cell-2-15-10)] {}; + \node[visible on=<2>, fill=Dark2-C!50, inner sep=2mm, fit=(confusion-cell-3-1-1) (confusion-cell-3-15-10)] {}; + \node[visible on=<2>, fill=Dark2-D!50, inner sep=2mm, fit=(confusion-cell-4-1-1) (confusion-cell-4-15-10)] {}; + + \coordinate (confusion-left) at ($(confusion-cell-1-1-1) + (-1mm, 0)$); + \coordinate (confusion-right) at (confusion-row-9.east); + \node[visible on=<3>, fill=Dark2-A!50, inner ysep=-0.5mm, fit=(confusion-cell-1-1-1 -| confusion-left) (confusion-row-1) (confusion-row-1 -| confusion-right)] {}; + \node[visible on=<4>, fill=Dark2-A!50, inner ysep=-0.5mm, fit=(confusion-cell-1-2-1 -| confusion-left) (confusion-row-2) (confusion-row-2 -| confusion-right)] {}; + \node[visible on=<5>, fill=Dark2-A!50, inner ysep=-0.5mm, fit=(confusion-cell-1-3-1 -| confusion-left) (confusion-row-3) (confusion-row-3 -| confusion-right)] {}; + \node[visible on=<6>, fill=Dark2-A!50, inner ysep=-0.5mm, fit=(confusion-cell-1-4-1 -| confusion-left) (confusion-row-4) (confusion-row-4 -| confusion-right)] {}; + + \node[visible on=<7>, inner xsep=-0.5mm, fill=Dark2-A!50, fit=(confusion-column-1-1) (confusion-cell-1-1-1) (confusion-cell-1-15-1)] {}; + \node[visible on=<8>, inner xsep=-0.5mm, fill=Dark2-A!50, fit=(confusion-column-1-2) (confusion-cell-1-1-2) (confusion-cell-1-15-2)] {}; + \node[visible on=<9>, inner xsep=-0.5mm, fill=Dark2-A!50, fit=(confusion-column-1-3) (confusion-cell-1-1-3) (confusion-cell-1-15-3)] {}; + \node[visible on=<10>, inner xsep=-0.5mm, fill=Dark2-A!50, fit=(confusion-column-1-4) (confusion-cell-1-1-4) (confusion-cell-1-15-4)] {}; + \node[visible on=<7>, inner xsep=-0.5mm, fill=Dark2-B!50, fit=(confusion-column-2-1) (confusion-cell-2-1-1) (confusion-cell-2-15-1)] {}; + \node[visible on=<8>, inner xsep=-0.5mm, fill=Dark2-B!50, fit=(confusion-column-2-2) (confusion-cell-2-1-2) (confusion-cell-2-15-2)] {}; + \node[visible on=<9>, inner xsep=-0.5mm, fill=Dark2-B!50, fit=(confusion-column-2-3) (confusion-cell-2-1-3) (confusion-cell-2-15-3)] {}; + \node[visible on=<10>, inner xsep=-0.5mm, fill=Dark2-B!50, fit=(confusion-column-2-4) (confusion-cell-2-1-4) (confusion-cell-2-15-4)] {}; + \node[visible on=<7>, inner xsep=-0.5mm, fill=Dark2-C!50, fit=(confusion-column-3-1) (confusion-cell-3-1-1) (confusion-cell-3-15-1)] {}; + \node[visible on=<8>, inner xsep=-0.5mm, fill=Dark2-C!50, fit=(confusion-column-3-2) (confusion-cell-3-1-2) (confusion-cell-3-15-2)] {}; + \node[visible on=<9>, inner xsep=-0.5mm, fill=Dark2-C!50, fit=(confusion-column-3-3) (confusion-cell-3-1-3) (confusion-cell-3-15-3)] {}; + \node[visible on=<10>, inner xsep=-0.5mm, fill=Dark2-C!50, fit=(confusion-column-3-4) (confusion-cell-3-1-4) (confusion-cell-3-15-4)] {}; + \node[visible on=<7>, inner xsep=-0.5mm, fill=Dark2-D!50, fit=(confusion-column-4-1) (confusion-cell-4-1-1) (confusion-cell-4-15-1)] {}; + \node[visible on=<8>, inner xsep=-0.5mm, fill=Dark2-D!50, fit=(confusion-column-4-2) (confusion-cell-4-1-2) (confusion-cell-4-15-2)] {}; + \node[visible on=<9>, inner xsep=-0.5mm, fill=Dark2-D!50, fit=(confusion-column-4-3) (confusion-cell-4-1-3) (confusion-cell-4-15-3)] {}; + \node[visible on=<10>, inner xsep=-0.5mm, fill=Dark2-D!50, fit=(confusion-column-4-4) (confusion-cell-4-1-4) (confusion-cell-4-15-4)] {}; + + \draw[visible on=<11>, draw=Dark2-A!50, line width=3mm, line cap=round] (confusion-cell-1-1-1) -- (confusion-cell-1-10-10); + \draw[visible on=<11>, draw=Dark2-B!50, line width=3mm, line cap=round] (confusion-cell-2-1-1) -- (confusion-cell-2-10-10); + \draw[visible on=<11>, draw=Dark2-C!50, line width=3mm, line cap=round] (confusion-cell-3-1-1) -- (confusion-cell-3-10-10); + \draw[visible on=<11>, draw=Dark2-D!50, line width=3mm, line cap=round] (confusion-cell-4-1-1) -- (confusion-cell-4-10-10); + + \node[visible on=<13>, fill=Dark2-D!50, inner ysep=-0.5mm, fit=(confusion-cell-4-1-1) (confusion-row-1) (confusion-row-1 -| confusion-right)] {}; + \node[visible on=<13>, fill=Dark2-D!50, inner ysep=-0.5mm, fit=(confusion-cell-4-3-1) (confusion-row-3) (confusion-row-3 -| confusion-right)] {}; + + \node[visible on=<14>, fill=Dark2-D!50, inner ysep=2mm, fit=(confusion-column-4-1.center) (confusion-column-4-2.center) (confusion-cell-4-1-2)] {}; + \end{pgfonlayer} + \end{tikzpicture}% +\end{frame} diff --git a/presentation/fitb/quantitative.tex b/presentation/fitb/quantitative.tex @@ -0,0 +1,39 @@ +\begin{frame}{Quantitative Results: NYT+FB} + \centering% + {\usebeamercolor[fg]{alerted text}% + \begin{tikzpicture}[ + remember picture, + overlay, + highlight/.style={fill=fg!30!white, inner sep=0.5mm}, + ] + \only<2>{\node[highlight, fit=(fitbquant-c11) (fitbquant-c11-|fitbquant-c81.west) (fitbquant-c42) (fitbquant-c49)] {};} + \only<3>{\node[highlight, fit=(fitbquant-c51) (fitbquant-c51-|fitbquant-c81.west) (fitbquant-c62) (fitbquant-c69)] {};} + \only<4>{\node[highlight, fit=(fitbquant-c71) (fitbquant-c71-|fitbquant-c81.west) (fitbquant-c82) (fitbquant-c89)] {};} + \end{tikzpicture}}% + \begin{tabular}[b]{c c r r r r r r r} + \toprule + \multicolumn{2}{c}{Model} & \multicolumn{3}{c}{\bcubed} & \multicolumn{3}{c}{V-measure} & \multirow{2}{*}{\textsc{ari}} \\ + \cmidrule(lr){1-2}\cmidrule(lr){3-5}\cmidrule(lr){6-8} + Classifier & Reg. & \fone & Prec. & Rec. & \fone & Hom. & Comp. & \\ + \midrule + \tikzmarknode{fitbquant-c11}{rel-LDA} & & 29.1 & 24.8 & 35.2 & 30.0 & 26.1 & 35.1 & \tikzmarknode{fitbquant-c19}{13.3} \\ + \tikzmarknode{fitbquant-c21}{rel-LDA1} & & 36.9 & 30.4 & 47.0 & 37.4 & 31.9 & 45.1 & \tikzmarknode{fitbquant-c29}{24.2} \\ + \tikzmarknode{fitbquant-c31}{Linear} & \tikzmarknode{fitbquant-c32}{\loss{vae reg}} & 35.2 & 23.8 & 67.1 & 27.0 & 18.6 & 49.6 & \tikzmarknode{fitbquant-c39}{18.7} \\ + \midrule + \tikzmarknode{fitbquant-c41}{PCNN} & \tikzmarknode{fitbquant-c42}{\loss{vae reg}} & 27.6 & 24.3 & 31.9 & 24.7 & 21.2 & 29.6 & \tikzmarknode{fitbquant-c49}{15.7} \\ + \tikzmarknode{fitbquant-c51}{Linear} & \tikzmarknode{fitbquant-c52}{\(\loss{s}+\loss{d}\)} & 37.5 & 31.1 & 47.4 & \strong{38.7} & 32.6 & 47.8 & \tikzmarknode{fitbquant-c59}{27.6} \\ + \tikzmarknode{fitbquant-c61}{PCNN} & \tikzmarknode{fitbquant-c62}{\(\loss{s}+\loss{d}\)} & \strong{39.4} & 32.2 & 50.7 & 38.3 & 32.2 & 47.2 & \tikzmarknode{fitbquant-c69}{\strong{33.8}} \\ + \midrule + \tikzmarknode{fitbquant-c71}{\bertcoder} & \tikzmarknode{fitbquant-c72}{\(\loss{s}+\loss{d}\)} & 41.5 & 34.6 & 51.8 & 39.9 & 33.9 & 48.5 & \tikzmarknode{fitbquant-c79}{35.1} \\ + \tikzmarknode{fitbquant-c81}{\bertcoder} & \tikzmarknode{fitbquant-c82}{SelfORE} & \emph{\futuraHeavierTable 49.1} & 47.3 & 51.1 & \emph{\futuraHeavierTable 46.6} & 45.7 & 47.6 & \tikzmarknode{fitbquant-c89}{\emph{\futuraHeavierTable 40.3}} \\ + \bottomrule + \end{tabular} + + \bigskip + \raggedright + + \strut% + \only<1-3>{\newline}% + \only<4>{\citeauthor{selfore} \citefield{selfore}[linkedtitle]{title} \citefield{selfore}{shortseries}~\cite*{selfore}}% + \strut% +\end{frame} diff --git a/presentation/fitb/rellda.tex b/presentation/fitb/rellda.tex @@ -0,0 +1,40 @@ +\begin{frame}[t]{Related Work: RelLDA (generative, 2011)}% + \begin{columns}[T]% + \begin{column}{6.5cm}% + An LDA-like model: + \begin{center}% + \input{mainmatter/relation extraction/rellda plate.tex} + \end{center} + \end{column}% + \begin{column}{7.5cm}% + \(\rndm{\theta}_d\) distribution of relations in document \(d\) + + \medskip + + \(\rndm{r}_i\) conveyed relation + + \medskip + + \(\rndm{\phi}_{rj}\) associate features to relations + + \medskip + + \(\rndm{f}_i\) features: + \begin{enumerate} + \item bag of words of the infix; + \item surface form of the entities; + \item lemma words on the dependency path; + \item \textsc{pos} of the infix words; + \item[…] + \end{enumerate} + \end{column}% + \end{columns} + + \bigskip + + Assume \hypothesis{biclique}: \(\forall r\in\relationSet:\exists A,B\subseteq\entitySet: r\relationComposition\breve{r}=A^2\land\breve{r}\relationComposition r=B^2\) + + \bigskip + + \problemBox{Makes large independance assumptions.} +\end{frame} diff --git a/presentation/fitb/section.tex b/presentation/fitb/section.tex @@ -0,0 +1,19 @@ +\section{Regularizing Discriminative Models} +\label{sec:fitb} +\input{presentation/fitb/clustering.tex} +\input{presentation/fitb/plan.tex} +\input{presentation/fitb/rellda.tex} +\input{presentation/fitb/marcheggiani.tex} +\input{presentation/fitb/pcnn.tex} +\input{presentation/fitb/deep fail.tex} +\input{presentation/fitb/teaser.tex} +\input{presentation/fitb/surrogate.tex} +\input{presentation/fitb/classifier.tex} +\input{presentation/fitb/entity predictor.tex} +\input{presentation/fitb/negative sampling.tex} +\input{presentation/fitb/problems.tex} +\input{presentation/fitb/skewness.tex} +\input{presentation/fitb/distribution distance.tex} +\input{presentation/fitb/quantitative.tex} +\input{presentation/fitb/qualitative.tex} +\input{presentation/fitb/conclusion.tex} diff --git a/presentation/fitb/skewness.tex b/presentation/fitb/skewness.tex @@ -0,0 +1,19 @@ +\begin{frame}{Skewness Loss}% + \begin{columns}% + \begin{column}{6cm}% + \centering% + \input{mainmatter/fitb/problem 1.tex}% + \end{column} + \begin{column}{7cm}% + \begin{block}{Ensure Confidence} + \begin{equation*} + \loss{S}(\vctr{\phi}) = \expectation_{(\rndm{s}, \rndmvctr{e})\sim \uniformDistribution(\dataSet)} \left[ \entropy(\rndm{R} \mid \rndm{s}, \rndmvctr{e}; \vctr{\phi}) \right] + \end{equation*} + + \bigskip + + The entropy of the relation distribution must be low for each sample. + \end{block} + \end{column} + \end{columns} +\end{frame} diff --git a/presentation/fitb/surrogate.tex b/presentation/fitb/surrogate.tex @@ -0,0 +1,32 @@ +\begin{frame}{Fill-in-the-blank Surrogate Task} + \begin{centering} + ``The \uhead{sol} was the currency of \utail{~?~} between 1863 and 1985.'' + \end{centering} + + \pause + \bigskip + + \(e_{-i}\) missing entity, \(e_i\) remaining entity, \(s\) conveying sentence\uncover<3->{, \(r\) conveyed relation} + \begin{equation*} + \text{for } i=1, 2: \qquad + \overbrace{P(e_{-i} \mid s, e_i)}^{\text{fill-in-the-blank}} + \uncover<4->{= \sum_{r\in\relationSet} \overbrace{P(r\mid s)}^{\text{classifier}}} + \uncover<3->{\overbrace{P(e_{-i} \mid r, e_i)}^{\text{entity predictor}}} + \end{equation*} + + \smallskip + + \uncover<4->{ + Assume \hypothesis{blankable}: The relation can be predicted from the text surrounding the two entities alone. + } + + \bigskip + + \uncover<5->{ + \begin{enumerate} + \item Train a fill-in-the-blank model on an unsupervised dataset. + \item Throw away the entity predictor. + \item Use the classifier on new samples. + \end{enumerate} + } +\end{frame} diff --git a/presentation/fitb/teaser.tex b/presentation/fitb/teaser.tex @@ -0,0 +1,7 @@ +\begin{frame}{Understanding the Problem} + \begin{itemize} + \item We introduce a new formalism. + \item The encoder and decoder are sub-models performing different tasks. + \item The interaction between these two sub-models is problematic. + \end{itemize} +\end{frame} diff --git a/presentation/graph/basic approaches.tex b/presentation/graph/basic approaches.tex @@ -0,0 +1,20 @@ +\begin{frame}{Initial Approaches} + Use both pieces of information jointly, linguistic and topological.\\ + ``The more features, the better.'' + + \bigskip + + \begin{block}{Nonparametric WL} + By using the Wasserstein definition of \(\operatorname{sim}_\text{topo}\). + + We can use \(\operatorname{sim}_\text{topoling}\) as-is, without fine-tuning. + \end{block} + + \pause + + \begin{block}{MTB GCN} + Train \bertcoder{} and a GCN to learn that parallel arcs should have high \(\operatorname{sim}_\text{topoling}\). + + This enable MTB to use neighborhood for prediction. + \end{block} +\end{frame} diff --git a/presentation/graph/conclusion.tex b/presentation/graph/conclusion.tex @@ -0,0 +1,16 @@ +\begin{frame}{Conclusion} + \begin{block}{Take-home Message} + Topological information can be leverage for unsupervised relation extraction. + \end{block} + \begin{block}{Contributions} + \begin{itemize} + \item Explicitly modeled the aggregate setup for the unsupervised problem. + \item Provided proof on the quality of topological information. + \item Proposed an approach to exploit the mutual information between topological and linguistic features. + \end{itemize} + \end{block} + + \medskip + + Several directions still need to be explored. +\end{frame} diff --git a/presentation/graph/counting.tex b/presentation/graph/counting.tex @@ -0,0 +1,18 @@ +\begin{frame}{Proof of Principle: Counting Paths} + \begin{block}{Proposition} + Given the \alert{path} \raisebox{-0.6em}{\input{mainmatter/graph/3-path.tex}} we expect \(\rndm{r}_1 \notindependent \rndm{r}_2 \notindependent \rndm{r}_3\). + \end{block} + \begin{block}{Goal} + Compute the mutual information \(\operatorname{I}(\rndm{r}_2; \rndm{r}_1, \rndm{r}_3)\) + \end{block} + \pause + \begin{block}{Path Counting Algorithm} + We can (slowly) sample \alert{walks} using power of the adjacency matrix. + + \begin{enumerate} + \item Sample a walk by chaining neighbors + \item Reject non-path + \item Count the accepted paths weighted by importance + \end{enumerate} + \end{block} +\end{frame} diff --git a/presentation/graph/encoding.tex b/presentation/graph/encoding.tex @@ -0,0 +1,70 @@ +\begingroup +\def\shiftA#1{\raisebox{-2.5ex}} +\def\shiftB#1{\raisebox{1ex}} +\tikzset{ + gnode/.style={ + draw, + ellipse + }, + patext/.style 2 args={ + decoration={ + text align=center, + text along path, + text={|\scriptsize||+#1|#2} + }, + decorate + }, +} + +\begin{frame}{Encoding Relation Extraction as a Multigraph Problem} + \only<1>{ + The exterior and interior of Freemasons' Hall continued to be a stand-in for \textcolor{Dark2-A}{\utail{Thames House}}, the headquarters of \textcolor{Dark2-B}{\uhead{\textsc{mi5}}}. + + \bigskip + + Golitsyn's claims about Wilson were believed in particular by the senior \textcolor{Dark2-B}{\uhead{\textsc{mi5}}} \textcolor{Dark2-D}{\utail{counterintelligence}} officer Peter Wright. + + \bigskip + + In its \textcolor{Dark2-D}{\utail{counter-espionage}} and counter-intelligence roles, \textcolor{Dark2-C}{\uhead{\textsc{smersh}}} appears to have been extremely successful throughout World War II. + + \bigskip + + The Freemasons' Hall in London served as the filming location for \textcolor{Dark2-A}{\uhead{Thames House}}, the headquarters for \textcolor{Dark2-B}{\utail{\textsc{mi5}}}. + } + \only<2->{% + \centering% + \begin{tikzpicture}% + \node[gnode, Dark2-A] (thames) at ( 0 , 3) {\footnotesize \textcolor{Dark2-A}{Thames House}}; + \node[gnode, Dark2-B] (mi5) at ( 6.5, 0) {\footnotesize \textcolor{Dark2-B}{\textsc{mi5}}}; + \node[gnode, Dark2-C] (smersh) at (-5.5, 0) {\footnotesize \textcolor{Dark2-C}{\textsc{smersh}}}; + \node[gnode, Dark2-D] (counter) at ( 0 , -3) {\footnotesize \textcolor{Dark2-D}{counterintelligence}}; + + \draw[thick,latex-] (thames.east) to [out=0,in=90] (mi5.north); + \draw[patext={\shiftA}{The exterior and interior of Freemasons' Hall…}] (thames.east) to [out=0,in=90] (mi5.north); + + \draw[thick,-latex] (thames.south) to [out=-90,in=180] (mi5.west); + \draw[patext={\shiftA}{The Freemasons' Hall in London served as the filming…}] (thames.south) to [out=-90,in=180] (mi5.west); + + \draw[thick,-latex] (smersh.south) to [out=-90,in=180] (counter.west); + \draw[patext={\shiftA}{In its counter-espionage and counter-…}] (smersh.south) to [out=-90,in=180] (counter.west); + + \draw[thick,latex-] (counter.east) to [out=0,in=-90] (mi5.south); + \draw[patext={\shiftA}{Golitsyn's claims about Wilson were believed…}] (counter.east) to [out=0,in=-90] (mi5.south); + + \draw[patext={\shiftB}{field of work}] (smersh.south) to [out=-90,in=180] (counter.west); + \draw[patext={\shiftB}{field of work}] (counter.east) to [out=0,in=-90] (mi5.south); + \draw[patext={\shiftB}{occupant}] (thames.south) to [out=-90,in=180] (mi5.west); + \draw[patext={\shiftB}{headquarters location}] (thames.east) to [out=0,in=90] (mi5.north); + \end{tikzpicture}% + }% + \only<3>{% + \begin{tikzpicture}[overlay, remember picture] + \node[inner sep=0, text width=11cm] at (current page.center) {% + \begin{block}{Weak Distributional Hypothesis on Relation Extraction Graph} + \emph{Two arcs conveying similar relations have similar neighborhoods.} + \end{block}}; + \end{tikzpicture}% + }% +\end{frame} +\endgroup diff --git a/presentation/graph/gcn.tex b/presentation/graph/gcn.tex @@ -0,0 +1,31 @@ +\begin{frame}{Capturing Neighborhoods}% + \begin{block}{Modeling Hypothesis} + \hypothesis{1-neighborhood}: Two samples with the same neighborhood in the relation extraction graph convey the same relation.\\ + \( \forall a, a'\in\arcSet\colon \gfeneighbors(a) = \gfeneighbors(a') \implies \gfrelation(a)=\gfrelation(a') \) + \end{block}% + \begin{columns}% + \begin{column}{6.5cm}% + \begin{block}{Graph Convolutional Network} + \centering% + \input{mainmatter/graph/graph convolution parallel.tex}% + + \begin{tikzpicture} + \node[inner sep=1mm, draw] (v) at (0, 0) {\(v\)}; + \node[inner sep=1mm, draw] (n1) at (1, 1) {\(n_1\)}; + \node[inner sep=1mm, draw] (n2) at (-1.5, 0.75) {\(n_2\)}; + \node[inner sep=1mm, draw] (n3) at (1.75, -0.25) {\(n_3\)}; + + \draw[arrow] (n1) -- (v) node[midway, above left, yshift=-1mm, xshift=1mm] {\(\mtrx{W}\)}; + \draw[arrow] (n2) -- (v) node[midway, above] {\(\mtrx{W}\)}; + \draw[arrow] (n3) -- (v) node[midway, below] {\(\mtrx{W}\)}; + \end{tikzpicture}% + \end{block}% + \end{column}% + \begin{column}{6.5cm}% + \begin{block}{Graph Isomorphism}% + \centering% + \input{mainmatter/graph/isomorphism.tex}% + \end{block}% + \end{column}% + \end{columns}% +\end{frame} diff --git a/presentation/graph/introduction.tex b/presentation/graph/introduction.tex @@ -0,0 +1,22 @@ +\begin{frame}{Introduction} + \alert{Sentential approaches}: extract sentences' relation independently (\(\sentenceSet\times\entitySet^2\to\relationSet\)) + + \alert{Aggregate approaches}: maps a set of sentences to a set of facts (\(2^{\sentenceSet\times\entitySet^2}\to2^{\entitySet^2\times\relationSet}\)) + + \medskip + + \begin{block}{Goal} + Exploit dataset-level regularities to leverage additional information + \end{block}% + \begin{block}{Plan} + \begin{enumerate} + \item Model datasets as graphs + \item Related relation extraction work only uses \alert{linguistic} similarities + \item Proof that \alert{topological} information can be used + \item How topological features are usually extracted (GCN) + \item How to extract them differently (WL isomorphism test) + \item Experimental results + \item Perspective + \end{enumerate} + \end{block} +\end{frame} diff --git a/presentation/graph/mtb.tex b/presentation/graph/mtb.tex @@ -0,0 +1,32 @@ +\begin{frame}{Related Work: Matching the Blanks (2019)} + \begin{block}{\bertcoder\ (linguistic)} + \centering% + \input{mainmatter/relation extraction/emes.tex} + \end{block} + \pause + \begin{minipage}{52mm} + \begin{block}{Prediction\vphantom{Hypotheses}}% + Compare samples using:\\ + \(\operatorname{sim}(x, x') = \operatorname{sigmoid}(\)\\ + \(\bertcoder(x)\transpose \bertcoder(x'))\) + \vspace*{2mm} + \end{block} + \end{minipage}% + \hfill% + \pause% + \begin{minipage}{83mm} + \begin{block}{Hypotheses} + \begin{minipage}{39mm}% + \centering% + \input{mainmatter/graph/mtb graph.tex}% + \vspace*{-5mm}% + \end{minipage}% + \hfill% + \begin{minipage}{41mm} + MTB assumes:\\ + \(r_1=r_2\) (\hypothesis{1-adjacency})\\ + \(r_3\neq r_1 \land r_3\neq r_2\) (\hypothesis{\(1\to1\)}) + \end{minipage}% + \end{block}% + \end{minipage}% +\end{frame} diff --git a/presentation/graph/quantitative.tex b/presentation/graph/quantitative.tex @@ -0,0 +1,51 @@ +\begin{frame}{Results: FewRel 5 way 1 shot Accuracies} + \begin{columns}% + \begin{column}{75mm} + \centering% + {\usebeamercolor[fg]{alerted text}% + \begin{tikzpicture}[ + remember picture, + overlay, + highlight/.style={fill=fg!30!white, inner sep=0.5mm}, + ] + \only<2>{\node[highlight, fit=(graphquant-c11) (graphquant-c12)] {};} + \only<3>{\node[highlight, fit=(graphquant-c21) (graphquant-c22)] {};} + \only<4>{\node[highlight, fit=(graphquant-c31) (graphquant-c32)] {};} + \only<5>{\node[highlight, fit=(graphquant-c41) (graphquant-c42)] {};} + \only<6>{\node[highlight, fit=(graphquant-c51) (graphquant-c52)] {};} + \end{tikzpicture}}% + \begin{tabular}{l r} + \toprule + Model & Accuracy \\ + \midrule + \multicolumn{2}{c}{{\futuraHeavier Pre-trained}} \\ + \midrule + \tikzmarknode{graphquant-c11}{Linguistic (\textsc{bert})} & \tikzmarknode{graphquant-c12}{69.46} \\ + \tikzmarknode{graphquant-c21}{Topological (\(W_1\))} & \tikzmarknode{graphquant-c22}{65.75} \\ + \tikzmarknode{graphquant-c31}{Topolinguistic} & \tikzmarknode{graphquant-c32}{72.18} \\ + \midrule + \multicolumn{2}{c}{{\futuraHeavier Fine-tuned}} \\ + \midrule + \tikzmarknode{graphquant-c41}{\textsc{mtb}} & \tikzmarknode{graphquant-c42}{78.83} \\ + \tikzmarknode{graphquant-c51}{\textsc{mtb gcn}--Chebyshev} & \tikzmarknode{graphquant-c52}{76.10} \\ + \bottomrule + \end{tabular} + \end{column}% + \begin{column}{65mm} + \begin{block}{Few-Shot Evaluation} + \alert{1} query + + \alert{5} candidates + + Which candidate conveys the same relation as the query? + + Random model score 20\% accuracy. + \end{block} + \end{column}% + \end{columns} + + \bigskip + + \raggedright + \uncover<2,5>{\citeauthor{mtb} \citefield{mtb}[linkedtitle]{title} \citefield{mtb}{shortseries}~\cite*{mtb}}% +\end{frame} diff --git a/presentation/graph/section.tex b/presentation/graph/section.tex @@ -0,0 +1,15 @@ +\section{Graph-based Aggregate Extraction} +\label{sec:graph} + +\input{presentation/graph/similarity function.tex} +\input{presentation/graph/introduction.tex} +\input{presentation/graph/mtb.tex} +\input{presentation/graph/encoding.tex} +\input{presentation/graph/counting.tex} +\input{presentation/graph/statistics.tex} +\input{presentation/graph/gcn.tex} +\input{presentation/graph/wasserstein.tex} +\input{presentation/graph/topological similarity.tex} +\input{presentation/graph/quantitative.tex} +\input{presentation/graph/conclusion.tex} +\input{presentation/graph/triplet loss.tex} diff --git a/presentation/graph/similarity function.tex b/presentation/graph/similarity function.tex @@ -0,0 +1,45 @@ +\begin{frame}{Similarity Function Approaches} + \begin{tabular}{@{}>{\hbadness=5000}m{73mm} @{\hspace{7mm}} >{\tikz[baseline={(0,-0.1)}]{\fill[black!30] (0,0.1) -- (0.75,0.1) -- (0.75,0.35) -- (1.25, 0) -- (0.75,-0.35) -- (0.75, -0.1) -- (0, -0.1) -- cycle;}\hspace{5mm}}l@{}} + \tikzmarknode{fewshot-c11}{% + \parbox{73mm}{\uhead{Megrez}\textsuperscript{\kern-2.5mm\wdent{850779}} is a star in the northern circumpolar constellation of \utail{Ursa Major}\textsuperscript{\kern-2.5mm\wdent{10460}}.}}% + & + \tikzmarknode{fewshot-c12}{\sfTripletHolds{e_1}{part of constellation}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{fewshot-c21}{% + \parbox{73mm}{\uhead{Posidonius}\textsuperscript{\kern-2.5mm\wdent{185770}} was a Greek philosopher, astronomer, historian, mathematician, and teacher native to \utail{Apamea, Syria}\textsuperscript{\kern-2.5mm\wdent{617550}}.}}% + & + \tikzmarknode{fewshot-c22}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \multicolumn{2}{c}{} \\[1mm] + \tikzmarknode{fewshot-c31}{% + \parbox{73mm}{\uhead{Hipparchus}\textsuperscript{\kern-2.5mm\wdent{159905}} was born in \utail{Nicaea, Bithynia}\textsuperscript{\kern-2.5mm\wdent{739037}}, and probably died on the island of Rhodes, Greece.}}% + & + \tikzmarknode{fewshot-c32}{\sfTripletHolds{e_1}{born in}{e_2}} \\ + \end{tabular}% + \begin{tikzpicture}[ + remember picture, + overlay, + fsbrace/.style={decorate, decoration={brace,amplitude=5}}, + fsvar/.style={midway, anchor=west, xshift=1mm}, + ] + \fill[white] (fewshot-c11.north east) rectangle (fewshot-c31.south-|fewshot-c12.east); + \draw[fsbrace] (fewshot-c11.north east) -- (fewshot-c11.south east) node[fsvar] {\vphantom{\(x\)}\smash{\textcolor{Dark2-B}{\(x_1\)}}}; + \draw[fsbrace] (fewshot-c21.north east) -- (fewshot-c21.south east) node[fsvar] {\vphantom{\(x\)}\smash{\textcolor{Dark2-A}{\(x_2\)}}}; + \draw[fsbrace] (fewshot-c31.north east) -- (fewshot-c31.south east) node[fsvar] {\vphantom{\(x\)}\smash{\textcolor{Dark2-C}{\(x_3\)}}}; + + \node[right=2cm of fewshot-c21, text width=5cm] {% + Learn a similarity function\\ + \(\operatorname{sim}\colon \dataSet\times\dataSet\to\symbb{R}\) + + \bigskip + + \(\operatorname{sim}(\textcolor{Dark2-B}{x_1}, \textcolor{Dark2-A}{x_2}) < \operatorname{sim}(\textcolor{Dark2-A}{x_2}, \textcolor{Dark2-C}{x_3})\)\\ + \(\operatorname{sim}(\textcolor{Dark2-B}{x_1}, \textcolor{Dark2-C}{x_3}) < \operatorname{sim}(\textcolor{Dark2-A}{x_2}, \textcolor{Dark2-C}{x_3})\)\\ + }; + \end{tikzpicture} + + \bigskip + + 5~way 1~shot: given 1 query and 5 candidates, which of the candidates is most similar to the query? + + Evaluated using accuracy. +\end{frame} diff --git a/presentation/graph/statistics.tex b/presentation/graph/statistics.tex @@ -0,0 +1,52 @@ +\begin{frame}{Proof of Principle: Path Statistics} + \begin{block}{Path Frequency} + \footnotesize% + \vspace*{2mm}% + \begin{center}% + \begin{tabular}{@{}r c c@{}} + \toprule + Frequency & Relation Surface forms & Relation Identifiers \\ + \midrule + 31.696‰ & \(\textsl{country} \relationComposition \textsl{diplomatic relation} \relationComposition \widebreve{\textsl{citizen of}}\) & \(\wdrel{17} \relationComposition \wdrel{530} \relationComposition \Pwidebreve{\wdrel{27}}\) \\ + \bottomrule + \end{tabular}% + \end{center}% + Example of path:\\ + \begin{tikzpicture} + \matrix[matrix of nodes, column sep=2.6cm, ampersand replacement=\&]{ + \node (vat phou) {Vat Phou}; \& + \node (laos) {Laos}; \& + \node (japan) {Japan}; \& + \node (souseki) {Natsume Sōseki}; \\ + }; + + \draw[arrow] (vat phou) -- (laos) node[midway, above] {\tiny\strut\uhead{Vat Phou} is a ruined Khmer…}; + \draw[arrow] (laos) -- (japan) node[midway, above] {\tiny\strut …\vphantom{\uhead{x}}the historical relationship between…}; + \draw[arrow] (souseki) -- (japan) node[midway, above] {\tiny\strut\uhead{Sōseki} was a \utail{Japanese} novelist}; + % country + % diplomatic relation + % REV(citizen of) + \end{tikzpicture}% + \end{block} + \pause + \begin{block}{Summary Statistics} + \unskipdisplay + \begin{equation*} + \tikzmarknode{statistics-m1}{\operatorname{I}(\rndm{r}_2; \rndm{r}_1, \rndm{r}_3)} + = + \tikzmarknode{statistics-m2}{\expectation_{r_1, r_3}[\entropy_{P(\rndm{r}_2)}(\rndm{r}_2\mid r_1, r_3)]} + - + \tikzmarknode{statistics-m3}{\entropy(\rndm{r}_2\mid\rndm{r}_1,\rndm{r}_3)} + \end{equation*} + \begin{tikzpicture}[remember picture, overlay] + \node[below=1mm of statistics-m1] (statistics-e1) {\rotatebox{90}{\(\approx\)}}; + \node (statistics-e2) at (statistics-e1-|statistics-m2) {\rotatebox{90}{\(\approx\)}}; + \node (statistics-e3) at (statistics-e1-|statistics-m3) {\rotatebox{90}{\(\approx\)}}; + + \node[below=-1mm of statistics-e1] {6.95 bits}; + \node[below=-1mm of statistics-e2] {8.01 bits}; + \node[below=-1mm of statistics-e3] {1.06 bits}; + \end{tikzpicture} + \vspace*{7mm} + \end{block} +\end{frame} diff --git a/presentation/graph/topological similarity.tex b/presentation/graph/topological similarity.tex @@ -0,0 +1,21 @@ +\begin{frame}{How to Exploit the Graph for Relation Extraction} + \begin{block}{Redefining similarity} + We keep the \alert{linguistic} similarity from MTB:\\ + \hspace{2cm}\(\operatorname{sim}_\text{ling}(x, x') = \operatorname{sigmoid}\left(\bertcoder(x)\transpose \bertcoder(x')\right)\) + + \bigskip + + But also define a \alert{topological} similarity: + + Either using GCN:\\ + \hspace{2cm}\(\operatorname{sim}^\text{GCN}_\text{topo}(x, x') = \operatorname{sigmoid}\left(\operatorname{GCN}(G)_x\transpose \operatorname{GCN}(G)_{x'}\right)\) + + Or 1-Wasserstein:\\ + \hspace{2cm}\(\operatorname{sim}^{W_1}_\text{topo}(x, x') = -W_1(\symfrak{S}(x, 1), \symfrak{S}(x', 1))\) + + \bigskip + + Define the \alert{topolinguistic} similarity as:\\ + \hspace{2cm}\(\operatorname{sim}_\text{topoling}(x, x') = \operatorname{sim}_\text{ling}(x, x') + \lambda \operatorname{sim}_\text{topo}(x, x')\) + \end{block} +\end{frame} diff --git a/presentation/graph/triplet loss.tex b/presentation/graph/triplet loss.tex @@ -0,0 +1,52 @@ +\tikzset{ + nsnode/.style={inner sep=0.5mm}, + nshighlight/.style={draw=#1, fill=#1!30!white, inner sep=0}, +} +\begin{frame}{Aligning Linguistic and Topological Similarities} + Use the topological features to identify the relational information in the linguistic features. + + \bigskip + + \begin{tikzpicture}[remember picture, overlay, on background layer] + \only<2->{ + \coordinate (triplet-right) at ($(triplet-negative1.east) + (7mm, 0)$); + \coordinate (triplet-positive-i) at ($(pic cs:triplet-1) + (1mm, 0.3em)$); + \node[nshighlight=Dark2-A, fit=(triplet-positive)] (triplet-positive-n) {}; + \draw[arrow, Dark2-A, rounded corners=1mm] (triplet-positive-n.east) -- (triplet-positive-n.east -| triplet-right) -- (triplet-positive-i -| triplet-right) -- (triplet-positive-i); + } + + \only<3->{ + \coordinate (triplet-right-2) at ($(triplet-right) + (1mm, 0)$); + \coordinate (triplet-negative-i) at ($(pic cs:triplet-2) + (1mm, 0.3em)$); + \node[nshighlight=Dark2-B, fit=(triplet-negative1) (triplet-negative2)] (triplet-negative-n) {}; + \draw[arrow, Dark2-B, rounded corners=1mm] (triplet-negative-n.east) -- (triplet-negative-n.east -| triplet-right-2) -- (triplet-negative-i -| triplet-right-2) -- (triplet-negative-i); + } + + \only<4>{ + \coordinate (triplet-right-3) at ($(triplet-right) + (2mm, 0)$); + \coordinate (triplet-margin-i) at ($(pic cs:triplet-3) + (1mm, 0.3em)$); + \node[nshighlight=Dark2-C, fit=(triplet-margin)] (triplet-margin-n) {}; + \coordinate (triplet-margin-top) at ($(triplet-margin-n.north) + (0, 3mm)$); + \draw[arrow, Dark2-C, rounded corners=1mm] (triplet-margin-n.north) -- (triplet-margin-top) -- (triplet-margin-top -| triplet-right-3) -- (triplet-margin-i -| triplet-right-3) -- (triplet-margin-i); + } + \end{tikzpicture} + \begin{equation*} + \loss{lt}(x_1, x_2, x_3) = \max\left( + \begin{aligned} + 0, \tikzmarknode[nsnode]{triplet-margin}{\zeta} & + + 2 \tikzmarknode[nsnode]{triplet-positive}{\big(\operatorname{sim}_\text{ling}(x_1, x_2) - \operatorname{sim}_\text{topo}(x_1, x_2)\big)^2} \\ + & \hspace{1cm} - \tikzmarknode[nsnode]{triplet-negative1}{\big(\operatorname{sim}_\text{ling}(x_1, x_2) - \operatorname{sim}_\text{topo}(x_1, x_3)\big)^2} \\ + & \hspace{1cm} - \tikzmarknode[nsnode]{triplet-negative2}{\big(\operatorname{sim}_\text{ling}(x_1, x_3) - \operatorname{sim}_\text{topo}(x_1, x_2)\big)^2} + \end{aligned} + \right) + \end{equation*} + + \pause + \bigskip + + \begin{itemize}[<+->] + \item Idealy we want to align the two similarities.\tikzmark{triplet-1} + \item However to stabilize the loss we need to use negative samples.\tikzmark{triplet-2} + \item Up to a margin \(\zeta\).\tikzmark{triplet-3} + \end{itemize} +\end{frame} diff --git a/presentation/graph/wasserstein.tex b/presentation/graph/wasserstein.tex @@ -0,0 +1,49 @@ +\begin{frame}{1-Wasserstein-based Use of Topological Features} + \begin{columns}% + \begin{column}{6.5cm}% + \begin{block}{Earth Mover Distance} + \centering% + \input{mainmatter/graph/Wasserstein.tex} + \end{block} + \begin{block}{Compare Topological Features} + Skip recoloring, directly compare neighborhoods in \(\symbb{R}^d\): + + \smallskip + + \(S(x, k) = \text{samples at distance \(k\) of \(x\)}\) + + \smallskip + + \(\symfrak{S}(x, k) =\)\\ + \hfill \(\{\,\bertcoder(y)\in\symbb{R}^d \mid y\in S(x, k)\,\}\)\\ + + \smallskip + + \centering% + \fbox{\(W_1(\symfrak{S}(x, 1), \symfrak{S}(x', 1))\)} + \end{block} + \end{column}% + \begin{column}{6.5cm}% + \begin{algorithmic} + \Function{Weisfeiler--Leman}{} + \FunctionInputs{} \(G=(V, E)\) graph + \FunctionInputs*{} \(k\) dimensionality + \FunctionOutput{} \(\chi_\infty\) coloring of \(k\)-tuples + \State + \State \(\chi_0(\vctr{x}) \gets \operatorname{iso}(\vctr{x}) \quad \forall \vctr{x}\in V^k\) + \For{\(\ell=1,2,\dotsc\)} + \State \(\symfrak{I}_\ell\gets \text{new color index}\) + \ForAll{\(\vctr{x}\in V^k\)} + \State \(c_\ell(\vctr{x}) \mathop{\raisebox{-1mm}{\(\Lsh\)}}\) + \State \hspace{2mm}\(\lMultiBrace\,\chi_{\ell-1}(\vctr{y}) \middlerel{|} \vctr{y}\in\gfneighbors^k(\vctr{x})\,\rMultiBrace\) + \State \(\chi_\ell(\vctr{x}) \mathop{\raisebox{-1mm}{\(\Lsh\)}}\) + \State \hspace{10mm}\((\chi_{\ell-1}(\vctr{x}), c_\ell(\vctr{x})) \text{ in } \symfrak{I}_\ell\) + \EndFor + \EndFor + \State \textbf{until} \(\chi_\ell = \chi_{\ell-1}\) + \State \Output \(\chi_\ell\) + \EndFunction + \end{algorithmic} + \end{column}% + \end{columns}% +\end{frame} diff --git a/presentation/graph/wl.tex b/presentation/graph/wl.tex @@ -0,0 +1,28 @@ +\begin{frame}{Weisfeiler--Leman Isomorphism Test}% + \begin{columns}% + \begin{column}{55mm}% + \centering% + \input{mainmatter/graph/isomorphism.tex} + \end{column}% + \begin{column}{75mm}% + \begin{algorithmic} + \Function{Weisfeiler--Leman}{} + \FunctionInputs{} \(G=(V, E)\) graph + \FunctionInputs*{} \(k\) dimensionality + \FunctionOutput{} \(\chi_\infty\) coloring of \(k\)-tuples + \State + \State \(\chi_0(\vctr{x}) \gets \operatorname{iso}(\vctr{x}) \quad \forall \vctr{x}\in V^k\) + \For{\(\ell=1,2,\dotsc\)} + \State \(\symfrak{I}_\ell\gets \text{new color index}\) + \ForAll{\(\vctr{x}\in V^k\)} + \State \(c_\ell(\vctr{x}) \gets \lMultiBrace\,\chi_{\ell-1}(\vctr{y}) \middlerel{|} \vctr{y}\in\gfneighbors^k(\vctr{x})\,\rMultiBrace\) + \State \(\chi_\ell(\vctr{x}) \gets (\chi_{\ell-1}(\vctr{x}), c_\ell(\vctr{x})) \text{ in } \symfrak{I}_\ell\) + \EndFor + \EndFor + \State \textbf{until} \(\chi_\ell = \chi_{\ell-1}\) + \State \Output \(\chi_\ell\) + \EndFunction + \end{algorithmic} + \end{column}% + \end{columns}% +\end{frame} diff --git a/presentation/supplementary/alignsep.tex b/presentation/supplementary/alignsep.tex @@ -0,0 +1,16 @@ +\begin{frame}{Aligning Sentences and Entity Pairs} + \begin{equation*} + P(\rndm{r}=r\mid s, \vctr{e}; \vctr{\theta}, \vctr{\phi}) = P(\rndm{r}_s=r\mid s; \vctr{\phi}) P(\rndm{r}_e=r\mid \vctr{e}; \vctr{\theta}) + \end{equation*} + + \bigskip + + \begin{equation*} + \loss{align}(\vctr{\theta}, \vctr{\phi}) = - \log \sum_{r\in\relationSet}P(r\mid s, \vctr{e}; \vctr{\theta}, \vctr{\phi}) + \loss{d}(\vctr{\theta}) + \loss{d}(\vctr{\phi}). + \end{equation*} + + \bigskip + + \centering% + \input{mainmatter/fitb/align.tex} +\end{frame} diff --git a/presentation/supplementary/distant.tex b/presentation/supplementary/distant.tex @@ -0,0 +1,23 @@ +\begin{frame}{Distant Supervision} + \begin{block}{\hypothesis{distant}} + A sentence conveys all the possible relations between all the entities it contains. + + \smallskip + + \(\dataSet_\relationSet = \dataSet \bowtie \kbSet\) + + \smallskip + + where \(\bowtie\) denotes the natural join operator: + \begin{equation*} + \dataSet \bowtie \kbSet = + \left\{\, + (s, e_1, e_2, r) + \mid + (s, e_1, e_2)\in\dataSet + \land + (e_1, e_2, r)\in\kbSet + \,\right\}. + \end{equation*} + \end{block} +\end{frame} diff --git a/presentation/supplementary/features.tex b/presentation/supplementary/features.tex @@ -0,0 +1,12 @@ +\begin{frame}{Features used by Marcheggiani and rel-LDA} + \begin{enumerate} + \item the bag of words of the infix; + \item the surface form of the entities; + \item the lemma words on the dependency path; + \item the \textsc{pos} of the infix words; + \item the type of the entity pair (e.g.\ person--location); + \item the type of the head entity (e.g.\ person); + \item the type of the tail entity (e.g.\ location); + \item the words on the dependency path between the two entities. + \end{enumerate} +\end{frame} diff --git a/presentation/supplementary/gumbel.tex b/presentation/supplementary/gumbel.tex @@ -0,0 +1,19 @@ +\begin{frame}{Gumbel--Softmax Model}% + \begin{equation*} + \pi_r = \frac{(\exp(y_r)+\rndm{G}_r)\divslash\tau}{\sum_{r'\in\relationSet}(\exp(y_{r'})+\rndm{G}_{r'})\divslash\tau} + \end{equation*} + + \bigskip + + \centering% + \begin{tabular}{c r r r r r r r} + \toprule + \multirow{2}{*}{Confidence} & \multicolumn{3}{c}{\bcubed} & \multicolumn{3}{c}{V-measure} & \multirow{2}{*}{\textsc{ari}} \\ + \cmidrule(lr){2-4}\cmidrule(lr){5-7} + & \fone & Prec. & Rec. & \fone & Hom. & Comp. & \\ + \midrule + \loss{s} regularization & 39.4 & 32.2 & 50.7 & 38.3 & 32.2 & 47.2 & 33.8 \\ + Gumbel--Softmax & 35.0 & 29.9 & 42.2 & 33.2 & 28.3 & 40.2 & 25.1 \\ + \bottomrule + \end{tabular}% +\end{frame} diff --git a/presentation/supplementary/line graph.tex b/presentation/supplementary/line graph.tex @@ -0,0 +1,4 @@ +\begin{frame}{Line Graph} + \centering% + \input{mainmatter/graph/line graph.tex} +\end{frame} diff --git a/presentation/supplementary/metrics.tex b/presentation/supplementary/metrics.tex @@ -0,0 +1,31 @@ +\begin{frame}{B cube}% +\begin{align*} + \bcubed \operatorname{precision}(g, c) & = \expectation_{\rndm{X},\rndm{Y}\sim\uniformDistribution(\dataSet_\relationSet)} P\left(g(\rndm{X})=g(\rndm{Y}) \mid c(\rndm{X})=c(\rndm{Y})\right) \\ + \bcubed \operatorname{recall}(g, c) & = \expectation_{\rndm{X},\rndm{Y}\sim\uniformDistribution(\dataSet_\relationSet)} P\left(c(\rndm{X})=c(\rndm{Y}) \mid g(\rndm{X})=g(\rndm{Y})\right) \\ + \bcubed \fone{}(g, c) & = \frac{2}{\bcubed{} \operatorname{precision}(g, c)^{-1} + \bcubed{} \operatorname{recall}(g, c)^{-1}} \\ +\end{align*} +\end{frame} + +\begin{frame}{V-measure}% +\begin{align*} + \operatorname{homogeneity}(g, c) & = 1 - \frac{\entropy\left(c(\rndm{X})\mid g(\rndm{X})\right)}{\entropy\left(c(\rndm{X})\right)} \\ + \operatorname{completeness}(g, c) & = 1 - \frac{\entropy\left(g(\rndm{X})\mid c(\rndm{X})\right)}{\entropy\left(g(\rndm{X})\right)} \\ + \operatorname{V-measure}(g, c) & = \frac{2}{\operatorname{homogeneity}(g, c)^{-1} + \operatorname{completeness}(g, c)^{-1}} \\ +\end{align*} +\end{frame} + +\begin{frame}{ARI}% +\begin{equation*} + \operatorname{\textsc{ri}}(g, c) = \expectation\limits_{\rndm{X},\rndm{Y}} \left[ P\left( + c(\rndm{X})=c(\rndm{Y}) \Leftrightarrow g(\rndm{X})=g(\rndm{Y}) + \right) \right] +\end{equation*} + +\smallskip + +\begin{equation*} + \operatorname{\textsc{ari}}(g, c) = + \frac{\displaystyle\operatorname{\textsc{ri}}(g, c) - \expectation_{c\sim\uniformDistribution(\relationSet^\dataSet)}[\operatorname{\textsc{ri}}(g, c)]} + {\displaystyle\max_{c\in\relationSet^\dataSet} \operatorname{\textsc{ri}}(g, c) - \expectation_{c\sim\uniformDistribution(\relationSet^\dataSet)}[\operatorname{\textsc{ri}}(g, c)]} +\end{equation*} +\end{frame} diff --git a/presentation/supplementary/otter.jpg b/presentation/supplementary/otter.jpg Binary files differ. diff --git a/presentation/supplementary/otter.tex b/presentation/supplementary/otter.tex @@ -0,0 +1,4 @@ +\begin{frame}{Emergency Otter}% + \centering% + \includegraphics[height=8cm]{presentation/supplementary/otter.jpg}% +\end{frame} diff --git a/presentation/supplementary/section.tex b/presentation/supplementary/section.tex @@ -0,0 +1,10 @@ +\section{Supplementary Material} +\label{sec:supplementary} +\input{presentation/supplementary/distant.tex} +\input{presentation/supplementary/features.tex} +\input{presentation/supplementary/metrics.tex} +\input{presentation/supplementary/gumbel.tex} +\input{presentation/supplementary/alignsep.tex} +\input{presentation/supplementary/spectral.tex} +\input{presentation/supplementary/line graph.tex} +\input{presentation/supplementary/otter.tex} diff --git a/presentation/supplementary/spectral.tex b/presentation/supplementary/spectral.tex @@ -0,0 +1,17 @@ +\begin{frame}{GCN Spatial \& Spectral}% + \begin{block}{Spectral (convolution is multiplication in Fourier space)} + \centering% + \begin{tabular}{l l l} + & \strong{Graph} & \strong{Euclidean} \\ + \midrule + Laplacian & \(\mtrx{L}=\mtrx{D}-\mtrx{M}\) & \(\nabla^2\) \\ + \(\hookrightarrow\) Eigenfunctions & \(\mtrx{U}\) s.t.~\(\mtrx{L}=\mtrx{U}\mtrx{\Lambda}\mtrx{U}^{-1}\) & \(\xi\mapsto e^{2\pi i\xi x}\) \\ + Fourier transform & \(\mtrx{U}\transpose\vctr{f}\) & \(\gffourier(f) = \int_{-\infty}^{\infty} f(x) e^{2\pi i\xi x} \diff x\) \\ + Convolution & \(\mtrx{U}(\mtrx{U}\transpose\vctr{w}\mtrx{U}\transpose\vctr{f})\) & \(\gfinvfourier(\gffourier(w)\gffourier(f))\) + \end{tabular} + \end{block} + \begin{block}{Spatial} + \centering% + \(\displaystyle\operatorname{\text{GCN}}(\mtrx{X}; \mtrx{W})_v = \ReLU\left(\frac{1}{|\gfneighbors(v)|} \sum_{n_i\in\gfneighbors(v)} \mtrx{W} \mtrx{X}_{n_i} \right)\) + \end{block} +\end{frame} diff --git a/thesis.bib b/thesis.bib @@ -1439,6 +1439,14 @@ location = {Hyderabad, India}, } +@book{optimal_transport, + title={Optimal transport: old and new}, + author={Cédric Villani}, + volume={338}, + year={2009}, + publisher={Springer} +} + @misc{oucuipo, author = {Gil Chevalier}, title = {Frontispice de la Bibliothèque Oucuipienne}, @@ -1765,7 +1773,7 @@ } @misc{ship_of_theseus, - author = {prefix=the, family={British Museum}}, + author = {prefix=the, family={British Museum}, given={}}, title = {Ariadne waking on the shore of Naxos}, date = {-0099/0100}, origlocation = {Herculaneum}, diff --git a/thesis.cls b/thesis.cls @@ -20,8 +20,8 @@ \NewDocumentCommand\thesis@options@error{m}{ \ClassError{thesis}{#1}{% - The options provided to the thesis class must explicitly contain one of `print'\MessageBreak - or `digital'. + The options provided to the thesis class must explicitly contain one of `print',\MessageBreak + `digital' or `presentation'. }% } @@ -40,11 +40,13 @@ %%% Class Options %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% print & digital +% print, digital or presentation \newif\ifthesis@digital \newif\ifthesis@print +\newif\ifthesis@presentation \DeclareOptionX{digital}{\thesis@digitaltrue} \DeclareOptionX{print}{\thesis@printtrue} +\DeclareOptionX{presentation}{\thesis@presentationtrue} % debug \newcount\thesis@debug @@ -63,25 +65,42 @@ \ProcessOptionsX\relax -% check print & digital consistency -\ifthesis@digital\ifthesis@print - \thesis@options@error{Can't enable both `digital' and `print' options at the same time} -\fi\fi -\ifthesis@digital\else\ifthesis@print\else - \thesis@options@error{Either `digital' or `print' must be chosen} -\fi\fi +% check print, digital and presentation consistency +\newcount\thesis@format@count +\thesis@format@count0\relax +\ifthesis@digital + \advance\thesis@format@count1\relax +\fi +\ifthesis@print + \advance\thesis@format@count1\relax +\fi +\ifthesis@presentation + \advance\thesis@format@count1\relax +\fi +\ifnum\thesis@format@count=1\else + \thesis@options@error{Exactly one of `digital', `print' or `presentation' must be given} +\fi %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Class Setup %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\PassOptionsToClass{a4paper,10pt}{book} -\LoadClass{book} +\ifthesis@presentation + \PassOptionsToClass{aspectratio=169}{beamer} + \LoadClass{beamer} + \usetheme{thesis} +\else + \PassOptionsToClass{a4paper,10pt}{book} + \LoadClass{book} +\fi % Basic dependencies \RequirePackage{fontspec} % For modern font interface -\RequirePackage[table]{xcolor} % For color handling +\ifthesis@presentation\else + \PassOptionsToPackage{table}{xcolor} +\fi +\RequirePackage{xcolor} % For color handling \RequirePackage{csquotes} % For context sensitive quotes \RequirePackage{caption} % For custom float and caption style \RequirePackage{booktabs} % For fancy \toprule &cie @@ -91,7 +110,13 @@ \DeclareRobustCommand\keywords[1]{\gdef\@keywords{#1}} % Continue page numbering when switching to main matter -\patchcmd{\mainmatter}{\pagenumbering{arabic}}{\gdef\thepage{\@arabic\c@page}}{}{\ClassError{thesis}{Can't keep continuous folio, patch failed.}} +\ifthesis@presentation\else + \patchcmd{\mainmatter} + {\pagenumbering{arabic}} + {\gdef\thepage{\@arabic\c@page}} + {} + {\thesis@patch@error{Class}{Can't keep continuous folio, patch failed.}} +\fi % Simple space after period \frenchspacing @@ -137,44 +162,50 @@ % Page Layout % %%%%%%%%%%%%%%% % This instantiate two geometries: withmarginpar and withoutmarginpar -\directlua{require("lib/layout").set{ - twoside=\ifthesis@print true\else false\fi, - top="2cm", - mpwidth="5cm", - mpsep="5mm", - debug=\ifnum\thesis@debug>1 true\else false\fi}} +\ifthesis@presentation\else + \directlua{require("lib/layout").set{ + twoside=\ifthesis@print true\else false\fi, + top="2cm", + mpwidth="5cm", + mpsep="5mm", + debug=\ifnum\thesis@debug>1 true\else false\fi}} +\fi %%%%%%%%%%% % Headers % %%%%%%%%%%% -\RequirePackage{fancyhdr} -\fancyhf{} -\RenewDocumentCommand\headrulewidth{}{0mm} -\RenewDocumentCommand\footrulewidth{}{0mm} - -\ifthesis@digital - \fancypagestyle{plain}{% - \fancyhf[HR]{\thepage}% - \fancyhf[HEL]{\ifthesissummary\rightmark\else\leftmark\fi}% - \fancyhf[HOL]{\rightmark}% - } -\else %print - \fancypagestyle{plain}{% - \fancyhf[HEL,HOR]{\thepage}% - \fancyhf[HER]{\ifthesissummary\rightmark\else\leftmark\fi}% - \fancyhf[HOL]{\rightmark}% - } +\ifthesis@presentation\else + \RequirePackage{fancyhdr} + \fancyhf{} + \RenewDocumentCommand\headrulewidth{}{0mm} + \RenewDocumentCommand\footrulewidth{}{0mm} + + \ifthesis@digital + \fancypagestyle{plain}{% + \fancyhf[HR]{\thepage}% + \fancyhf[HEL]{\ifthesissummary\rightmark\else\leftmark\fi}% + \fancyhf[HOL]{\rightmark}% + } + \else %print + \fancypagestyle{plain}{% + \fancyhf[HEL,HOR]{\thepage}% + \fancyhf[HER]{\ifthesissummary\rightmark\else\leftmark\fi}% + \fancyhf[HOL]{\rightmark}% + } + \fi + \pagestyle{plain} + + % Setup header content \fi -\pagestyle{plain} -% Setup header content -\RenewDocumentCommand\chaptermark{m}{\markboth{\if@mainmatter\thechapter\ \fi#1}{}} -\RenewDocumentCommand\sectionmark{m}{\markright{\thesection\ #1}} +\DeclareDocumentCommand\chaptermark{m}{\markboth{\if@mainmatter\thechapter\ \fi#1}{}} +\DeclareDocumentCommand\sectionmark{m}{\markright{\thesection\ #1}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Commands for switching geometry % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % marginparwidth + marginparsep +\ifthesis@presentation\else \NewDocumentCommand\margintotal{}{55mm} \NewDocumentCommand\withmarginpar{}{% @@ -193,6 +224,7 @@ \NewDocumentEnvironment{fullwidth}{} {\begin{adjustwidth}{}{-\margintotal}} {\end{adjustwidth}} +\fi %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Add version information to every page % @@ -201,14 +233,17 @@ \directlua{draft_version = require("lib/draft version")} \NewDocumentCommand\draftVersion{}{\directlua{draft_version.draft_version()}} \ifnum\thesis@debug>0 - \AddToShipoutPictureFG{% - \AtPageLowerLeft{% - \hspace{2mm}% - \makebox[0pt][l]{% - \rotatebox{90}{% - \hspace{2mm}% - \color{black}\ttfamily\footnotesize % - draft \draftVersion}}}} + % Version information is inserted by the beamer style for the presentation + \ifthesis@presentation\else + \AddToShipoutPictureFG{% + \AtPageLowerLeft{% + \hspace{2mm}% + \makebox[0pt][l]{% + \rotatebox{90}{% + \hspace{2mm}% + \color{black}\ttfamily\footnotesize % + draft \draftVersion}}}} + \fi \fi %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -262,6 +297,7 @@ %%%%%%%%%% % Titles % %%%%%%%%%% +\ifthesis@presentation\else \RequirePackage{titlesec} \newfontfamily\garamond{EB Garamond}[Ligatures=TeX] \titleformat{\chapter}% command @@ -282,6 +318,7 @@ \ifthesissummary \RenewDocumentCommand\thesection{}{\@arabic\c@section} \fi +\fi %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -305,6 +342,9 @@ % Symbols with no glyph in Latin Modern \setmathfont[range={\setminus}]{XITS Math} +% Remove incorrect skip when a block start with a display equation +\NewDocumentCommand\unskipdisplay{}{\vspace*{-\baselineskip}\setlength\belowdisplayshortskip{0pt}} + %%%%%%%%%%%%%%%%%%%%%%%%% % Uppercase PDF strings % %%%%%%%%%%%%%%%%%%%%%%%%% @@ -323,6 +363,11 @@ } \ExplSyntaxOn +\ifthesis@presentation +\def\textsc#1{% + {\text{\text_uppercase:n{#1}}}% +} +\else \def\textsc#1{% \texorpdfstring% {{% @@ -330,12 +375,14 @@ \oldtextsc{#1}}}% {\text_uppercase:n{#1}}% Use uppercase for PDF strings (e.g. in PDF bookmarks) where small caps should appear. } +\fi \ExplSyntaxOff %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Floats Handling %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\ifthesis@presentation\else \RequirePackage{newfloat} % For defining new floats \RequirePackage{tocloft} % For defining List of Illustrations \RequirePackage[oneside]{sidenotes} % For margin floats @@ -351,11 +398,12 @@ \sidenotetext[#1][#2]{\ignorespaces #3}% \@sidenotes@multimarker% } +\fi %%%%%%%%%%%%%%%%%%% % Algorithm float % %%%%%%%%%%%%%%%%%%% - +\ifthesis@presentation\else % List of Algorithms \NewDocumentCommand\listalgorithmname{}{List of Algorithms} \newlistof[chapter]{algorithm}{loa}{\listalgorithmname} @@ -380,10 +428,12 @@ \end{lrbox}% \@sidenotes@placemarginal{#1}{\usebox{\@sidenotes@marginalgorithmbox}}% } +\fi %%%%%%%%%%%%%%%%%%%%% % Margin Appearance % %%%%%%%%%%%%%%%%%%%%% +\ifthesis@presentation\else % Use the smaller font inside margins \captionsetup{font=marginsize} \AtBeginEnvironment{marginfigure}{\marginsize} @@ -392,6 +442,7 @@ % Smaller spacing between figure and caption \setlength{\abovecaptionskip}{2mm} +\fi % Modify marginnote package to always print on the right \patchcmd{\@mn@@@marginnote} @@ -405,6 +456,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Chapter handling for TOC & LOI % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\ifthesis@presentation\else \NewDocumentCommand\fixloititlefont{m}{ \expandafter\gdef\csname cft#1titlefont\endcsname{\garamond\Huge} } @@ -419,10 +471,12 @@ {} {\thesis@patch@error{Class}{Prepend failed, can't \string\cleardoublepage\space before \string\tableofcontents.}} } +\fi %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Lower case for special headers % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\ifthesis@presentation\else \NewDocumentCommand\MakeMarkcase{}{} % For biblatex \NewDocumentCommand\thesis@lower@head{m m}{% Patch command for frontmatter lists \RenewDocumentCommand{#1}{}{\@mkboth{#2}{#2}}% @@ -431,6 +485,7 @@ \thesis@lower@head{\cftmarklof}{\listfigurename} \thesis@lower@head{\cftmarklot}{\listtablename} \thesis@lower@head{\cftmarkloa}{\listalgorithmname} +\fi %%%%%%%%%%%%%%%%%%%%%%%% % Algorithm formatting % @@ -460,6 +515,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Universal Caption Command % %%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\ifthesis@presentation\else % Store the type of float (m=main area, s=side margin, w=wide) \def\thesis@float@type{} @@ -503,6 +559,7 @@ \fi% \fi% } +\fi %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -546,6 +603,14 @@ \setcounter{biburlucpenalty}{200} \setcounter{biburlnumpenalty}{100} +% Sort by family name without prefix first +\DeclareSortingNamekeyTemplate{ + \keypart{\namepart{family}} + \keypart{\namepart{prefix}} + \keypart{\namepart{given}} + \keypart{\namepart{suffix}} +} + %%%%%%%%%%%%%%%%%%%%%%%% % References in margin % %%%%%%%%%%%%%%%%%%%%%%%% @@ -613,7 +678,10 @@ \usetikzlibrary{decorations.pathreplacing} % for decorate \usetikzlibrary{decorations.text} % for text along path \usetikzlibrary{matrix} % for matrix of nodes -\usetikzlibrary{patterns} % for pattern +\usetikzlibrary{tikzmark} % to easily reference coordinates on the page +\usetikzlibrary{overlay-beamer-styles} % handle background layers when using beamer overlays +\usetikzlibrary{patterns} % for patterns +\usetikzlibrary{patterns.meta} % for parametrized patterns \usetikzlibrary{positioning} % for above=of \usetikzlibrary{shapes.geometric} % for regular polygon \usetikzlibrary{svg.path} diff --git a/thesis.sty b/thesis.sty @@ -6,12 +6,15 @@ \RequirePackage{lua-ul} % For \underLine \RequirePackage{ccicons} % For Creative Commons licence logos \RequirePackage{setspace} % For \onehalfspacing -\RequirePackage[inline]{enumitem} % For fancy itemize etc +\ifthesis@presentation\else + \RequirePackage[inline]{enumitem} % For fancy itemize etc +\fi \RequirePackage{array} % For \newcolumntype \RequirePackage{tabularx} % For X-like column types \RequirePackage{longtable} % For pagebreak inside a table \RequirePackage{multirow} % For \multirow and \multicolumn \RequirePackage{graphicx} % For \includegraphics on steroids +\RequirePackage{graphbox} % For additional \includegraphics options \RequirePackage{hyperref} % For hypertext functionalities \RequirePackage{bookmark} % For \pdfbookmark @@ -39,16 +42,37 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%% % Hyperlink configuration % %%%%%%%%%%%%%%%%%%%%%%%%%%% -\hypersetup{ - linktoc=all, - colorlinks=true, - linkcolor=red!60!black, - citecolor=green!60!black, - filecolor=cyan!60!black, - menucolor=red!60!black, - urlcolor=magenta!60!black, - pdfdisplaydoctitle=true, -} +\ifthesis@presentation + \hypersetup{ + linktoc=all, + colorlinks=false, + pdfdisplaydoctitle=true, + } +\else + \ifthesis@digital + \hypersetup{ + linktoc=all, + colorlinks=true, + linkcolor=red!60!black, + citecolor=green!60!black, + filecolor=cyan!60!black, + menucolor=red!60!black, + urlcolor=magenta!60!black, + pdfdisplaydoctitle=true, + } + \else + \hypersetup{ + linktoc=all, + colorlinks=true, + linkcolor=black, + citecolor=black, + filecolor=black, + menucolor=black, + urlcolor=black, + pdfdisplaydoctitle=true, + } + \fi +\fi %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -93,7 +117,7 @@ % Math commands % %%%%%%%%%%%%%%%%% \DeclareMathOperator*\expectation{\symbb{E}} -\DeclareMathOperator*\entropy{H} +\DeclareMathOperator\entropy{H} \DeclareMathOperator*\argmax{argmax} \DeclareMathOperator\pmi{pmi} \NewDocumentCommand\laplace{}{\symup{\Delta}} @@ -156,7 +180,7 @@ \NewDocumentCommand\sentenceSet{}{{\symcal{S}}} \NewDocumentCommand\dataSet{}{{\symcal{D}}} \NewDocumentCommand\arcSet{}{{\symcal{A}}} -\NewDocumentCommand\kbSet{}{{\dataSet_\textup{\textsc{kb}}}} +\NewDocumentCommand\kbSet{}{{\dataSet_{\textup{\textsc{kb}}}}} \NewDocumentCommand\itemSet{}{{\symcal{I}}} %%%%%%%%%%%%%%%%%%%%%%% @@ -285,9 +309,13 @@ %%%%%%%%%%%%%%%%% \NewDocumentCommand\empP{}{\ensuremath{\hat{P}}} \NewDocumentCommand\loss{O{\textsc} m}{\ensuremath{\symcal{L}_{#1{#2}}}} -\NewDocumentCommand\problem{m}{\ensuremath{\symscr{P}\;#1}} +\ProvideDocumentCommand\problem{m}{\ensuremath{\symscr{P}\;#1}} \NewDocumentCommand\bertArch{m}{\textsc{bert-}\discretionary{}{}{}\texttt{#1}} -\NewDocumentCommand\bertcoder{}{\ensuremath{\operatorname{\textsc{bert}coder}}} +\ifthesis@presentation + \NewDocumentCommand\bertcoder{}{\ensuremath{\operatorname{\text{BERTcoder}}}} +\else + \NewDocumentCommand\bertcoder{}{\ensuremath{\operatorname{\textsc{bert}coder}}} +\fi \NewDocumentCommand\fone{}{\ensuremath{F_1}} \NewDocumentCommand\bcubed{}{\ensuremath{\symup{B}^3}} \NewDocumentCommand\blanktag{}{\texttt{<}\textsc{blank}\texttt{/>}} @@ -365,7 +393,13 @@ \directlua{render = require("lib/render")} \NewDocumentCommand\renderEmbeddings{m}{\directlua{render.embeddings([[#1]])}} -\NewDocumentCommand\renderConfusions{m m m m m m m m}{\directlua{render.confusions("\luatexluaescapestring{\unexpanded{#1}}", "\luatexluaescapestring{\unexpanded{#2}}", "\luatexluaescapestring{\unexpanded{#3}}", "\luatexluaescapestring{\unexpanded{#4}}", "\luatexluaescapestring{\unexpanded{#5}}", "\luatexluaescapestring{\unexpanded{#6}}", "\luatexluaescapestring{\unexpanded{#7}}", "\luatexluaescapestring{\unexpanded{#8}}")}} +\NewDocumentCommand\renderConfusionsDimensions{m m m}{ + \def\thesis@confusion@mdelta{#1} + \def\thesis@confusion@cdelta{#2} + \def\thesis@confusion@radius{#3} +} +\renderConfusionsDimensions{3}{0.27}{0.15} +\NewDocumentCommand\renderConfusions{s m m m m m m m m}{\directlua{render.confusions(\thesis@confusion@mdelta, \thesis@confusion@cdelta, \thesis@confusion@radius, "\luatexluaescapestring{\unexpanded{#2}}", "\luatexluaescapestring{\unexpanded{#3}}", "\luatexluaescapestring{\unexpanded{#4}}", "\luatexluaescapestring{\unexpanded{#5}}", "\luatexluaescapestring{\unexpanded{#6}}", "\luatexluaescapestring{\unexpanded{#7}}", "\luatexluaescapestring{\unexpanded{#8}}", "\luatexluaescapestring{\unexpanded{#9}}", \IfBooleanTF{#1}{true}{false})}} \NewDocumentCommand\renderDegrees{m}{\directlua{render.degrees([[#1]])}} @@ -388,3 +422,5 @@ \newfontfamily\traditionalChineseFont{I.Ming}[Ligatures=TeX,LetterSpace=15,Vertical=Alternates] \NewDocumentCommand\traditionalChinese{m}{\traditionalChineseFont\fontsize{9pt}{11pt}\selectfont #1} + +\newfontfamily\japaneseFont{IPAMincho}[Ligatures=TeX] diff --git a/thesis.tex b/thesis.tex @@ -15,7 +15,7 @@ \withoutmarginpar \include{frontmatter/title} \include{frontmatter/abstract} - %\include{frontmatter/acknowledgements} + \include{frontmatter/acknowledgements} \tableofcontents \listoffigures \listoftables