FPUH YAML source

code: FPUH
mfte_code: FPUH
name: Filled pauses and interjections
definition: >-
  Hesitation markers and interjections: oh, er, hmm, uh, ah, etc.
normalization: words
detection:
- source: mfte
  requires:
  - word
  cql: '[word={words}]'
  words:
  - ah
  - aw
  - bye
  - eh
  - er
  - erm
  - ha
  - heh
  - hey
  - hi
  - hm
  - hmm
  - huh
  - mmm
  - oh
  - oi
  - ouch
  - ow
  - uh
  - uhm
  - um
  - woops
  - yeh
- source: mfte
  requires:
  - pos
  cql: '[pos="UH"]'
  combine: "_ & !DMA"
  description: >-
    MFTE catch-all (line 1225): all remaining UH-tagged tokens become
    FPUH. This catches interjections not in the explicit word list
    (e.g., "Sure" tagged UH by Stanza). DMA tokens excluded because
    DMA runs earlier and consumes some UH tokens.
sources:
- mfte