TIGERSearch templates for searching topological fields
// File: topological_fields_tlt13.tig
//////////////////////////////////////////
// TEMPLATES FOR TIGER-SCHEME CORPORA
//////////////////////////////////////////
// This file contains templates for querying topological fields
// in the TIGER corpus, as used and described in Dipper (2014),
// Proceedings of TLT13.
//
// Most templates apply to the "context-free" version of TIGER,
// i.e. without crossing branches (= templates with suffix _cf).
// Only the VF template comes in two versions, one for the context-free
// and one for the version with crossing branches (= suffix _enr).
// The templates have been tested on an "enriched" version of TIGER,
// i.e. one with unary nodes (e.g. all pronouns are dominated by an NP)
//
// The file contains
// 1. basic templates
// - precedence relations
// - left-corner relation (for non-terminals)
// 2. topological templates
// - VF (including V2)
// - LK
// - RK
// - MF
// - NF
//////////////////////////////////////////
// Basic templates
//////////////////////////////////////////
// Precedence relation
// - right corner of #x precedes left corner of #y
// - covers non-terminal nodes
// - allows for intervening quotes
prec(#x,#y) <-
( // 1. #x is a terminal node
#x: [word=/.*/]
& #x . #y
| // 2. #x is non-terminal
#x: [cat=/.*/]
& #x >@r #xchildR
& #xchildR . #y
| // 3. quotes may intervene (everywhere)
prec_quote(#x,#y)
)
;
prec_quote(#x,#y) <-
// like precedence, but quotes may intervene
#quote: [word=("\""|"``"|"''")]
&
( // 1. #x is a terminal node
#x: [word=/.*/]
& #x . #quote
& #quote . #y
| // 2. #x is non-terminal
#x: [cat=/.*/]
& #x >@r #xchildR
& #xchildR . #quote
& #quote . #y
)
;
prec_comma(#x,#y) <-
// like precedence, but commas may intervene
#comma: [word="\,"]
&
( // 1. #x is a terminal node
#x: [word=/.*/]
& #x . #comma
& #comma . #y
| // 2. #x is non-terminal
#x: [cat=/.*/]
& #x >@r #xchildR
& #xchildR . #comma
& #comma . #y
)
;
// Left-corner dominance
// like >@l but can be used with nonterminals
hasLeftChild(#x,#y) <-
( // #y is a terminal node
#x >@l #y
| // #y is a non-terminal
#y >* #z
& #x >@l #z
)
;
//////////////////////////////////////////
// Vorfeld constituent
//////////////////////////////////////////
// For efficiency reasons broken up into two parts:
// 1. VFmain covers VF (and V2) in main clauses
// 2. VFsub covers VF in subord. clauses
// 1. VF (and V2) in main clauses
// Note: not all VF constituents are non-terminals
// e.g. 'hinzu/PTKVZ kommen/VVFIN einige...'
VFmain_cf(#vf,#v2) <-
#s: [cat="S"]
& #v2: [pos=/V.FIN/] // #v2: Verb in second position
& #s > #vf // #vf: Vorfeld constituent
& #s >HD #v2
// VF is first constituent
& ( // 1. VF is very first element in the sentence
hasLeftChild(#s,#vf) // #vf is left-most child
| // 2. Or some coordinating conjunction precedes VF
#s >@l #conj
& [] >JU #conj
& prec(#conj,#vf)
)
// VF precedes VFIN
& ( // 1. VF directly precedes V2
prec(#vf,#v2)
| // 2. A comma may intervene after clausal or appositive VF
( #vf: [cat=("S"|"VP")] // either VF itself precedes comma
& prec_comma(#vf,#v2)
| #vf >* #clause_app // or some embedded constituent
& ( #clause_app: [cat=("S"|"VP")]
| [] >APP #clause_app
)
& prec_comma(#clause_app,#v2)
)
)
;
// 2. VF in subordinate clauses
VFsub_cf(#vf) <-
#s: [cat="S"]
& #s > #vf // #vf: Vorfeld constituent
& // VF is very first element in the sentence
hasLeftChild(#s,#vf) // #vf is left-most child
& #vf >* [pos=/.*(REL|W).*/] // relative or interrogative elements
;
// VF, version for crossing branches
VFmain_enr(#vf,#v2) <-
// VF contains discontinuous element -> take daughter node
#s: [cat="S"]
& #v2: [pos=/V.FIN/]
& #s > #vfin
& #s >* #vf_disc// #vf_disc: disontinuous mother of VF constituent
& discontinuous(#vf_disc)
& #vf_disc > #vf
// VF is first constituent
& ( // 1. VF is very first element in the sentence
hasLeftChild(#s,#vf) // #vf is left-most child
| // 2. Some coordinating conjunction precedes VF
#s >@l #conj
& [] >JU #conj
& prec(#conj,#vf)
)
// VF precedes VFIN
& ( // 1. VF directly precedes V2
prec(#vf,#v2)
| // 2. A comma may intervene after clausal or appositive VF
( #vf: [cat=("S"|"VP")] // either VF itself precedes comma
& prec_comma(#vf,#v2)
| #vf >* #clause_app // or some embedded constituent
& ( #clause_app: [cat=("S"|"VP")]
| [] >APP #clause_app
)
& prec_comma(#clause_app,#v2)
)
)
;
//////////////////////////////////////////
// Left Bracket + Verb second
//////////////////////////////////////////
// Note: LK only covers subordinating conjunctions
// LK filled by V2 has to be searched via the template VFmain!
LK_cf(#lk) <-
// filled by subordinating conjunction
[] > #lk: [pos=/KOUS|KOUI/]
;
//////////////////////////////////////////
// Right Bracket / Verb cluster
//////////////////////////////////////////
// Template broken up into three s not terminate in TIGERSearch
MF_cf_orig(#mf1,#mf2) <-
#mf1: [cat=/.*/] // some constituent
& #mf2: [cat=/.*/] // some constituent
& #lk: [] & LK_cf(#lk)
& #rk: [] & RK_cf(#rk)
& prec(#lk,#mf1)
& prec(#mf2,#rk)
;
//////////////////////////////////////////
// Nachfeld constituents
//////////////////////////////////////////
// The template only marks the beginning of NF (NFB)
// Note: the template uses a very simple heuristics!
// Template comes in two versions
// 1. NFB marks the first word of NF
// 2. NFBconst marks the first constituent of NF
// (may result in spurious ambiguities)
NFB_cf(#nfB) <-
( #rk: [pos=/(V.INF|V.IZU|V.PP|PTKVZ)/]
| #v: [pos=/(V.INF|V.IZU|V.PP)/]
& #rk: [pos=/V.FIN/] // fin only allowed after other verbs
& #v . #rk
)
// nf should not include any part of RK
& #nfB: [pos!=/(V.FIN|V.INF|V.IZU|V.PP|PTKZU|PTKVZ)/ & pos!=/\$.*/]
& ( // 1. RK directly precedes NF
prec(#rk,#nfB)
| // 2. A comma may intervene
prec_comma(#rk,#nfB)
)
;
NFBconst_cf(#nfB) <-
( #rk: [pos=/(V.INF|V.IZU|V.PP|PTKVZ)/]
| #v: [pos=/(V.INF|V.IZU|V.PP)/]
& #rk: [pos=/V.FIN/] // fin only allowed after other verbs
& #v . #rk
)
// nf: some constituent
& #nfB: [cat=/.*/]
& ( // 1. RK directly precedes NF
prec(#rk,#nfB)
| // 2. A comma may intervene
prec_comma(#rk,#nfB)
)
;