Module clu.phontools.struct
Expand source code
from enum import Enum
from abc import ABC, abstractmethod
from typing import List, Dict, Tuple, Any, Text, Sequence
import re
import itertools
from autocorrect import Speller
from pydantic import BaseModel
from pydantic.dataclasses import dataclass
import numpy as np
Pronunciation = Tuple[Text, ...]
Phone = Text
# type alias for a string of characters representing a word
SimpleWord = Text
spell = Speller(lang="en")
class Hashable(ABC):
"""Ensures subclasses are hashable"""
@abstractmethod
def __hash__(self) -> int:
pass
class Stress(Enum):
"""Enumeration of all possible stress values"""
NON_VOWEL = "-"
"""The phone is not a vowel (i.e., it cannot have a stress assignment)"""
NO_STRESS = "0"
"""The phone (vowel) is unstressed."""
PRIMARY = "1"
"""The phone (vowel) receives primary stress"""
SECONDARY = "2"
"""The phone (vowel) receives secondary stress."""
def __repr__(self) -> Text:
return f"Stress.{self.name}"
class CoarseStress(Enum):
"""A coarse representation of stress is categorized as being either strong (S) of weak (W)."""
STRONG = "S"
"""Strong stress. Corresponds to `clu.phontools.struct.Stress.PRIMARY` and `clu.phontools.struct.Stress.SECONDARY`"""
WEAK = "W"
"""Weak stress. Corresponds to `clu.phontools.struct.Stress.NO_STRESS`"""
def __repr__(self) -> Text:
return f"CoarseStress.{self.name}"
class SyllableProperties(ABC):
"""Propertie and manipulations of syllables."""
"""A sequence of `clu.phontools.struct.Stress` assignments"""
def __init__(self):
self.stress_pattern: Sequence[Stress] = []
@property
def coarse_stress_pattern(self) -> Sequence[CoarseStress]:
"""Maps a detailed stress sequence to a sequence of strong and weak stressed syllabled"""
summary = []
for stress in self.stress_pattern:
if stress == Stress.NO_STRESS:
summary.append(CoarseStress.WEAK)
elif stress in {Stress.PRIMARY, Stress.SECONDARY}:
summary.append(CoarseStress.STRONG)
return summary
@property
def coarse_stress(self) -> Text:
"""Converts a phonological word to a sequence of S (strong) or W (weak) symbols"""
return "".join(cs.value for cs in self.coarse_stress_pattern)
def mask_syllables(self, mask: Text = "X") -> Text:
"""Converts a phonological word where each syllable is represented using the mask
conceptual examples:
"poo" -> "X" where mask is "X"
"July" -> "XX" where mask is "X"
"""
return "".join(mask for cs in self.coarse_stress_pattern)
@property
def num_syllables(self):
"""A syllable contains at most one stressed element (weak or strong)"""
return len(self.coarse_stress_pattern)
# FIXME: consider adding PhonologicalSystem(Enum) -> ARPABET, IPA, XAMPA, etc.
class PhonologicalWord(BaseModel, SyllableProperties, Hashable):
"""A [phonological word](https://en.wikipedia.org/wiki/Phonological_word) composed of one or more syllables
:param phones: a sequences of phonological symbols (character, kana, etc.)
:param stress_pattern: a sequence of `clu.phontools.struct.Stress` assignments (one for each of the `phones`)
"""
phones: Sequence[Phone]
"""NOTE: For an EnglishSyllable, use `clu.phontools.lang.en.EnglishUtils.pronouncing_dict` as part of @staticmethod factory constructor"""
stress_pattern: Sequence[Stress]
def __iter__(self) -> Phone:
for phone in self.phones:
yield phone
def __getitem__(self, i: int) -> Phone:
return self.phones[i]
def __reversed__(self) -> "Phrase":
return PhonologicalWord(phones=self.phones[::-1])
def __contains__(self, item) -> bool:
return True if item in self.phones else False
def __len__(self) -> int:
return len(self.phones)
def __hash__(self) -> int:
return hash(tuple(list(self.phones) + list(self.stress_pattern)))
class Word(BaseModel, Hashable):
r"""The smallest sequence of phonemes that can be uttered in isolation with objective or practical meaning.
:param word: orthographic representation of this Word
:param phonological_form: the phonological form of this Word
"""
word: Text
phonological_form: PhonologicalWord
@property
def pf(self) -> PhonologicalWord:
"""Alias for phonological_form"""
return self.phonological_form
def graphemes(self) -> Sequence[Text]:
"""Individual characters/symbols that comprise the orthographic representation of the `clu.phontools.struct.Word`"""
return "".split(self.word)
def __hash__(self) -> int:
return hash((("word", self.word), ("pf", hash(self.phonological_form))))
class Phrase(BaseModel, Hashable):
"""A sequence of `clu.phontools.struct.Word` constitutes a Phrase.
:param words: The sequence of `clu.phontools.struct.Word` that constitutes this Phrase.
"""
words: Sequence[Word]
@property
def coarse_stress(self) -> Sequence[Text]:
"""Returns coarse stress form for each word in the Phrase"""
return [word.pf.coarse_stress for word in self.words]
def mask_syllables(self, mask: Text = "X") -> Sequence[Text]:
"""Returns coarse stress form for each word in the Phrase"""
return [word.pf.mask_syllables(mask) for word in self.words]
def words_as_phones(self) -> Sequence[Text]:
"""Represent `clu.phontools.struct.Phrase` using a sequence of symbols denoting the phones of each word
Example:
```python
from clu.phontools.struct import *
# define a phrase
phrase = Phrase(
words=[
Word(
word='hello',
phonological_form=PhonologicalWord(
phones=('HH', 'EH0', 'L', 'OW1'),
stress_pattern=[
Stress.NON_VOWEL,
Stress.NO_STRESS,
Stress.NON_VOWEL,
Stress.PRIMARY
]
)
),
Word(
word='world',
phonological_form=PhonologicalWord(
phones=('W', 'ER1', 'L', 'D'),
stress_pattern=[
Stress.NON_VOWEL,
Stress.PRIMARY,
Stress.NON_VOWEL,
Stress.NON_VOWEL
]
)
)
]
)
phrase.to_phones()
# should return ['HH EH0 L OW1', 'W ER1 L D']
```
"""
return [" ".join(word.pf.phones) for word in self.words]
@property
def phones(self) -> Sequence[Text]:
"""Represent `clu.phontools.struct.Phrase` using a flat sequence of symbols denoting the phones of each word
Example:
```python
from clu.phontools.struct import *
# define a phrase
phrase = Phrase(
words=[
Word(
word='hello',
phonological_form=PhonologicalWord(
phones=('HH', 'EH0', 'L', 'OW1'),
stress_pattern=[
Stress.NON_VOWEL,
Stress.NO_STRESS,
Stress.NON_VOWEL,
Stress.PRIMARY
]
)
),
Word(
word='world',
phonological_form=PhonologicalWord(
phones=('W', 'ER1', 'L', 'D'),
stress_pattern=[
Stress.NON_VOWEL,
Stress.PRIMARY,
Stress.NON_VOWEL,
Stress.NON_VOWEL
]
)
)
]
)
phrase.to_phones()
# should return ["HH", "EH0", "L", "OW1", "W", "ER1", "L", "D"]
```
"""
return [phone for word in self.words for phone in word.pf.phones]
@property
def coarse_stress_pattern(self) -> Sequence[CoarseStress]:
"""Returns coarse stress pattern for each word in the Phrase"""
return [word.pf.coarse_stress_pattern for word in self.words]
@property
def stress_pattern(self) -> Sequence[Stress]:
"""Returns stress pattern for each word in the Phrase"""
return [word.pf.stress_pattern for word in self.words]
def match_coarse_stress_pattern(self, pattern: Text) -> bool:
"""Checks if `clu.phontools.struct.Phrase matches` the specified coarse stress pattern (a regular expression).
Example:
```python
from clu.phontools.struct import *
# define a phrase
phrase = Phrase(
words=[
Word(
word='hello',
phonological_form=PhonologicalWord(
phones=('HH', 'EH0', 'L', 'OW1'),
stress_pattern=[
Stress.NON_VOWEL,
Stress.NO_STRESS,
Stress.NON_VOWEL,
Stress.PRIMARY
]
)
)
]
)
phrase.match_coarse_stress_pattern("WS")
# should return True
```
"""
return True if re.match(pattern, " ".join(self.coarse_stress)) else False
def match_masked_syllables(self, pattern: Text, mask: Text = "X") -> bool:
"""Checks if `clu.phontools.struct.Phrase matches` the specified masked stress pattern (a regular expression).
Example:
```python
from clu.phontools.struct import *
# define a phrase
phrase = Phrase(
words=[
Word(
word='hello',
phonological_form=PhonologicalWord(
phones=('HH', 'EH0', 'L', 'OW1'),
stress_pattern=[
Stress.NON_VOWEL,
Stress.NO_STRESS,
Stress.NON_VOWEL,
Stress.PRIMARY
]
)
)
]
)
phrase.match_masked_syllables("^XX", mask="X")
# should return True
```
"""
return (
True
if re.match(pattern, " ".join(self.mask_syllables(mask=mask)))
else False
)
def __iter__(self) -> Word:
for word in self.words:
yield word
def __getitem__(self, i: int) -> Word:
return self.words[i]
def __reversed__(self) -> "Phrase":
return Phrase(words=self.words[::-1])
def __contains__(self, item) -> bool:
return True if item in self.words else False
def __len__(self) -> int:
return len(self.words)
def __hash__(self) -> int:
return hash(tuple(hash(w) for w in self.words))
class LangUtils(ABC):
"""Utilities to be implemented for each language."""
@staticmethod
@abstractmethod
def phonological_word_for(phones: Pronunciation) -> PhonologicalWord:
"""Produces a `clu.phontools.struct.PhonologicalWord` for a sequence of phones"""
pass
@staticmethod
@abstractmethod
def all_possible_forms_for(word: Text) -> Sequence[Word]:
"""Generates a list of `clu.phontools.struct.Word` from an orthographic form."""
pass
@staticmethod
@abstractmethod
def all_possible_phrases_for(words: Sequence[Text]) -> Sequence[Phrase]:
"""Generates a possible pronunciations from a sequence of words (as text)."""
pass
@staticmethod
@abstractmethod
def syllabify(pronunciation: Pronunciation) -> Sequence[Pronunciation]:
"""Abstract static method to syllabify a sequence of phones that constitute the pronunciation of a single lexical item.
Example:
A subclass that implements this method would ...
```python
MyEnglishSyllabifier.syllabify(('P', 'ER0', 'M', 'IH1', 'T'))
# should return [('P', 'ER0'), ('M', 'IH1', 'T')]
```
"""
pass
# class StressSequence:
# """A sequence of stress assignments.
# """
# sequence: List[Stress]
#
# class SimpleStressSequence:
# sequence:
# stress = [str(p.value) for p in pron]
# ['-', '0', '-', '1', '-']
# syllable_structure =
# pron -> stress -> syllable counts
# ['-', '0', '-', '1', '-'] -> "WS"
# First token has two syllables: Strong Weak
# ["SW", "S", "W", "SW"]
# Each X represents a syllable
# ["X" ,"X" ,"X", "X", "XX"]
# mask_syllable_stress(["SW", "S", "W", "SW"]) -> ["XX", "X", "X", "XX"]
# Output:
# second token of transcript (1) inserts before weak syllable
# [(1, "IW")]
Classes
class CoarseStress (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
A coarse representation of stress is categorized as being either strong (S) of weak (W).
Expand source code
class CoarseStress(Enum): """A coarse representation of stress is categorized as being either strong (S) of weak (W).""" STRONG = "S" """Strong stress. Corresponds to `clu.phontools.struct.Stress.PRIMARY` and `clu.phontools.struct.Stress.SECONDARY`""" WEAK = "W" """Weak stress. Corresponds to `clu.phontools.struct.Stress.NO_STRESS`""" def __repr__(self) -> Text: return f"CoarseStress.{self.name}"
Ancestors
- enum.Enum
Class variables
var STRONG
-
Strong stress. Corresponds to
Stress.PRIMARY
andStress.SECONDARY
var WEAK
-
Weak stress. Corresponds to
Stress.NO_STRESS
class Hashable
-
Ensures subclasses are hashable
Expand source code
class Hashable(ABC): """Ensures subclasses are hashable""" @abstractmethod def __hash__(self) -> int: pass
Ancestors
- abc.ABC
Subclasses
class LangUtils
-
Utilities to be implemented for each language.
Expand source code
class LangUtils(ABC): """Utilities to be implemented for each language.""" @staticmethod @abstractmethod def phonological_word_for(phones: Pronunciation) -> PhonologicalWord: """Produces a `clu.phontools.struct.PhonologicalWord` for a sequence of phones""" pass @staticmethod @abstractmethod def all_possible_forms_for(word: Text) -> Sequence[Word]: """Generates a list of `clu.phontools.struct.Word` from an orthographic form.""" pass @staticmethod @abstractmethod def all_possible_phrases_for(words: Sequence[Text]) -> Sequence[Phrase]: """Generates a possible pronunciations from a sequence of words (as text).""" pass @staticmethod @abstractmethod def syllabify(pronunciation: Pronunciation) -> Sequence[Pronunciation]: """Abstract static method to syllabify a sequence of phones that constitute the pronunciation of a single lexical item. Example: A subclass that implements this method would ... ```python MyEnglishSyllabifier.syllabify(('P', 'ER0', 'M', 'IH1', 'T')) # should return [('P', 'ER0'), ('M', 'IH1', 'T')] ``` """ pass
Ancestors
- abc.ABC
Subclasses
Static methods
def all_possible_forms_for(word: str) ‑> Sequence[Word]
-
Generates a list of
Word
from an orthographic form.Expand source code
@staticmethod @abstractmethod def all_possible_forms_for(word: Text) -> Sequence[Word]: """Generates a list of `clu.phontools.struct.Word` from an orthographic form.""" pass
def all_possible_phrases_for(words: Sequence[str]) ‑> Sequence[Phrase]
-
Generates a possible pronunciations from a sequence of words (as text).
Expand source code
@staticmethod @abstractmethod def all_possible_phrases_for(words: Sequence[Text]) -> Sequence[Phrase]: """Generates a possible pronunciations from a sequence of words (as text).""" pass
def phonological_word_for(phones: Tuple[str, ...]) ‑> PhonologicalWord
-
Produces a
PhonologicalWord
for a sequence of phonesExpand source code
@staticmethod @abstractmethod def phonological_word_for(phones: Pronunciation) -> PhonologicalWord: """Produces a `clu.phontools.struct.PhonologicalWord` for a sequence of phones""" pass
def syllabify(pronunciation: Tuple[str, ...]) ‑> Sequence[Tuple[str, ...]]
-
Abstract static method to syllabify a sequence of phones that constitute the pronunciation of a single lexical item.
Example: A subclass that implements this method would …
MyEnglishSyllabifier.syllabify(('P', 'ER0', 'M', 'IH1', 'T')) # should return [('P', 'ER0'), ('M', 'IH1', 'T')]
Expand source code
@staticmethod @abstractmethod def syllabify(pronunciation: Pronunciation) -> Sequence[Pronunciation]: """Abstract static method to syllabify a sequence of phones that constitute the pronunciation of a single lexical item. Example: A subclass that implements this method would ... ```python MyEnglishSyllabifier.syllabify(('P', 'ER0', 'M', 'IH1', 'T')) # should return [('P', 'ER0'), ('M', 'IH1', 'T')] ``` """ pass
class PhonologicalWord (**data: Any)
-
A phonological word composed of one or more syllables
:param phones: a sequences of phonological symbols (character, kana, etc.) :param stress_pattern: a sequence of
Stress
assignments (one for each of thephones
)Create a new model by parsing and validating input data from keyword arguments.
Raises ValidationError if the input data cannot be parsed to form a valid model.
Expand source code
class PhonologicalWord(BaseModel, SyllableProperties, Hashable): """A [phonological word](https://en.wikipedia.org/wiki/Phonological_word) composed of one or more syllables :param phones: a sequences of phonological symbols (character, kana, etc.) :param stress_pattern: a sequence of `clu.phontools.struct.Stress` assignments (one for each of the `phones`) """ phones: Sequence[Phone] """NOTE: For an EnglishSyllable, use `clu.phontools.lang.en.EnglishUtils.pronouncing_dict` as part of @staticmethod factory constructor""" stress_pattern: Sequence[Stress] def __iter__(self) -> Phone: for phone in self.phones: yield phone def __getitem__(self, i: int) -> Phone: return self.phones[i] def __reversed__(self) -> "Phrase": return PhonologicalWord(phones=self.phones[::-1]) def __contains__(self, item) -> bool: return True if item in self.phones else False def __len__(self) -> int: return len(self.phones) def __hash__(self) -> int: return hash(tuple(list(self.phones) + list(self.stress_pattern)))
Ancestors
- pydantic.main.BaseModel
- pydantic.utils.Representation
- SyllableProperties
- Hashable
- abc.ABC
Class variables
var phones : Sequence[str]
-
NOTE: For an EnglishSyllable, use
EnglishUtils.pronouncing_dict
as part of @staticmethod factory constructor var stress_pattern : Sequence[Stress]
Inherited members
class Phrase (**data: Any)
-
A sequence of
Word
constitutes a Phrase.:param words: The sequence of
Word
that constitutes this Phrase.Create a new model by parsing and validating input data from keyword arguments.
Raises ValidationError if the input data cannot be parsed to form a valid model.
Expand source code
class Phrase(BaseModel, Hashable): """A sequence of `clu.phontools.struct.Word` constitutes a Phrase. :param words: The sequence of `clu.phontools.struct.Word` that constitutes this Phrase. """ words: Sequence[Word] @property def coarse_stress(self) -> Sequence[Text]: """Returns coarse stress form for each word in the Phrase""" return [word.pf.coarse_stress for word in self.words] def mask_syllables(self, mask: Text = "X") -> Sequence[Text]: """Returns coarse stress form for each word in the Phrase""" return [word.pf.mask_syllables(mask) for word in self.words] def words_as_phones(self) -> Sequence[Text]: """Represent `clu.phontools.struct.Phrase` using a sequence of symbols denoting the phones of each word Example: ```python from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ), Word( word='world', phonological_form=PhonologicalWord( phones=('W', 'ER1', 'L', 'D'), stress_pattern=[ Stress.NON_VOWEL, Stress.PRIMARY, Stress.NON_VOWEL, Stress.NON_VOWEL ] ) ) ] ) phrase.to_phones() # should return ['HH EH0 L OW1', 'W ER1 L D'] ``` """ return [" ".join(word.pf.phones) for word in self.words] @property def phones(self) -> Sequence[Text]: """Represent `clu.phontools.struct.Phrase` using a flat sequence of symbols denoting the phones of each word Example: ```python from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ), Word( word='world', phonological_form=PhonologicalWord( phones=('W', 'ER1', 'L', 'D'), stress_pattern=[ Stress.NON_VOWEL, Stress.PRIMARY, Stress.NON_VOWEL, Stress.NON_VOWEL ] ) ) ] ) phrase.to_phones() # should return ["HH", "EH0", "L", "OW1", "W", "ER1", "L", "D"] ``` """ return [phone for word in self.words for phone in word.pf.phones] @property def coarse_stress_pattern(self) -> Sequence[CoarseStress]: """Returns coarse stress pattern for each word in the Phrase""" return [word.pf.coarse_stress_pattern for word in self.words] @property def stress_pattern(self) -> Sequence[Stress]: """Returns stress pattern for each word in the Phrase""" return [word.pf.stress_pattern for word in self.words] def match_coarse_stress_pattern(self, pattern: Text) -> bool: """Checks if `clu.phontools.struct.Phrase matches` the specified coarse stress pattern (a regular expression). Example: ```python from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ) ] ) phrase.match_coarse_stress_pattern("WS") # should return True ``` """ return True if re.match(pattern, " ".join(self.coarse_stress)) else False def match_masked_syllables(self, pattern: Text, mask: Text = "X") -> bool: """Checks if `clu.phontools.struct.Phrase matches` the specified masked stress pattern (a regular expression). Example: ```python from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ) ] ) phrase.match_masked_syllables("^XX", mask="X") # should return True ``` """ return ( True if re.match(pattern, " ".join(self.mask_syllables(mask=mask))) else False ) def __iter__(self) -> Word: for word in self.words: yield word def __getitem__(self, i: int) -> Word: return self.words[i] def __reversed__(self) -> "Phrase": return Phrase(words=self.words[::-1]) def __contains__(self, item) -> bool: return True if item in self.words else False def __len__(self) -> int: return len(self.words) def __hash__(self) -> int: return hash(tuple(hash(w) for w in self.words))
Ancestors
- pydantic.main.BaseModel
- pydantic.utils.Representation
- Hashable
- abc.ABC
Class variables
var words : Sequence[Word]
Instance variables
var coarse_stress : Sequence[str]
-
Returns coarse stress form for each word in the Phrase
Expand source code
@property def coarse_stress(self) -> Sequence[Text]: """Returns coarse stress form for each word in the Phrase""" return [word.pf.coarse_stress for word in self.words]
var coarse_stress_pattern : Sequence[CoarseStress]
-
Returns coarse stress pattern for each word in the Phrase
Expand source code
@property def coarse_stress_pattern(self) -> Sequence[CoarseStress]: """Returns coarse stress pattern for each word in the Phrase""" return [word.pf.coarse_stress_pattern for word in self.words]
var phones : Sequence[str]
-
Represent
Phrase
using a flat sequence of symbols denoting the phones of each wordExample:
from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ), Word( word='world', phonological_form=PhonologicalWord( phones=('W', 'ER1', 'L', 'D'), stress_pattern=[ Stress.NON_VOWEL, Stress.PRIMARY, Stress.NON_VOWEL, Stress.NON_VOWEL ] ) ) ] ) phrase.to_phones() # should return ["HH", "EH0", "L", "OW1", "W", "ER1", "L", "D"]
Expand source code
@property def phones(self) -> Sequence[Text]: """Represent `clu.phontools.struct.Phrase` using a flat sequence of symbols denoting the phones of each word Example: ```python from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ), Word( word='world', phonological_form=PhonologicalWord( phones=('W', 'ER1', 'L', 'D'), stress_pattern=[ Stress.NON_VOWEL, Stress.PRIMARY, Stress.NON_VOWEL, Stress.NON_VOWEL ] ) ) ] ) phrase.to_phones() # should return ["HH", "EH0", "L", "OW1", "W", "ER1", "L", "D"] ``` """ return [phone for word in self.words for phone in word.pf.phones]
var stress_pattern : Sequence[Stress]
-
Returns stress pattern for each word in the Phrase
Expand source code
@property def stress_pattern(self) -> Sequence[Stress]: """Returns stress pattern for each word in the Phrase""" return [word.pf.stress_pattern for word in self.words]
Methods
def mask_syllables(self, mask: str = 'X') ‑> Sequence[str]
-
Returns coarse stress form for each word in the Phrase
Expand source code
def mask_syllables(self, mask: Text = "X") -> Sequence[Text]: """Returns coarse stress form for each word in the Phrase""" return [word.pf.mask_syllables(mask) for word in self.words]
def match_coarse_stress_pattern(self, pattern: str) ‑> bool
-
Checks if
Phrase matches
the specified coarse stress pattern (a regular expression).Example:
from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ) ] ) phrase.match_coarse_stress_pattern("WS") # should return True
Expand source code
def match_coarse_stress_pattern(self, pattern: Text) -> bool: """Checks if `clu.phontools.struct.Phrase matches` the specified coarse stress pattern (a regular expression). Example: ```python from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ) ] ) phrase.match_coarse_stress_pattern("WS") # should return True ``` """ return True if re.match(pattern, " ".join(self.coarse_stress)) else False
def match_masked_syllables(self, pattern: str, mask: str = 'X') ‑> bool
-
Checks if
Phrase matches
the specified masked stress pattern (a regular expression).Example:
from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ) ] ) phrase.match_masked_syllables("^XX", mask="X") # should return True
Expand source code
def match_masked_syllables(self, pattern: Text, mask: Text = "X") -> bool: """Checks if `clu.phontools.struct.Phrase matches` the specified masked stress pattern (a regular expression). Example: ```python from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ) ] ) phrase.match_masked_syllables("^XX", mask="X") # should return True ``` """ return ( True if re.match(pattern, " ".join(self.mask_syllables(mask=mask))) else False )
def words_as_phones(self) ‑> Sequence[str]
-
Represent
Phrase
using a sequence of symbols denoting the phones of each wordExample:
from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ), Word( word='world', phonological_form=PhonologicalWord( phones=('W', 'ER1', 'L', 'D'), stress_pattern=[ Stress.NON_VOWEL, Stress.PRIMARY, Stress.NON_VOWEL, Stress.NON_VOWEL ] ) ) ] ) phrase.to_phones() # should return ['HH EH0 L OW1', 'W ER1 L D']
Expand source code
def words_as_phones(self) -> Sequence[Text]: """Represent `clu.phontools.struct.Phrase` using a sequence of symbols denoting the phones of each word Example: ```python from clu.phontools.struct import * # define a phrase phrase = Phrase( words=[ Word( word='hello', phonological_form=PhonologicalWord( phones=('HH', 'EH0', 'L', 'OW1'), stress_pattern=[ Stress.NON_VOWEL, Stress.NO_STRESS, Stress.NON_VOWEL, Stress.PRIMARY ] ) ), Word( word='world', phonological_form=PhonologicalWord( phones=('W', 'ER1', 'L', 'D'), stress_pattern=[ Stress.NON_VOWEL, Stress.PRIMARY, Stress.NON_VOWEL, Stress.NON_VOWEL ] ) ) ] ) phrase.to_phones() # should return ['HH EH0 L OW1', 'W ER1 L D'] ``` """ return [" ".join(word.pf.phones) for word in self.words]
class Stress (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
Enumeration of all possible stress values
Expand source code
class Stress(Enum): """Enumeration of all possible stress values""" NON_VOWEL = "-" """The phone is not a vowel (i.e., it cannot have a stress assignment)""" NO_STRESS = "0" """The phone (vowel) is unstressed.""" PRIMARY = "1" """The phone (vowel) receives primary stress""" SECONDARY = "2" """The phone (vowel) receives secondary stress.""" def __repr__(self) -> Text: return f"Stress.{self.name}"
Ancestors
- enum.Enum
Class variables
var NON_VOWEL
-
The phone is not a vowel (i.e., it cannot have a stress assignment)
var NO_STRESS
-
The phone (vowel) is unstressed.
var PRIMARY
-
The phone (vowel) receives primary stress
var SECONDARY
-
The phone (vowel) receives secondary stress.
class SyllableProperties
-
Propertie and manipulations of syllables.
Expand source code
class SyllableProperties(ABC): """Propertie and manipulations of syllables.""" """A sequence of `clu.phontools.struct.Stress` assignments""" def __init__(self): self.stress_pattern: Sequence[Stress] = [] @property def coarse_stress_pattern(self) -> Sequence[CoarseStress]: """Maps a detailed stress sequence to a sequence of strong and weak stressed syllabled""" summary = [] for stress in self.stress_pattern: if stress == Stress.NO_STRESS: summary.append(CoarseStress.WEAK) elif stress in {Stress.PRIMARY, Stress.SECONDARY}: summary.append(CoarseStress.STRONG) return summary @property def coarse_stress(self) -> Text: """Converts a phonological word to a sequence of S (strong) or W (weak) symbols""" return "".join(cs.value for cs in self.coarse_stress_pattern) def mask_syllables(self, mask: Text = "X") -> Text: """Converts a phonological word where each syllable is represented using the mask conceptual examples: "poo" -> "X" where mask is "X" "July" -> "XX" where mask is "X" """ return "".join(mask for cs in self.coarse_stress_pattern) @property def num_syllables(self): """A syllable contains at most one stressed element (weak or strong)""" return len(self.coarse_stress_pattern)
Ancestors
- abc.ABC
Subclasses
Instance variables
var coarse_stress : str
-
Converts a phonological word to a sequence of S (strong) or W (weak) symbols
Expand source code
@property def coarse_stress(self) -> Text: """Converts a phonological word to a sequence of S (strong) or W (weak) symbols""" return "".join(cs.value for cs in self.coarse_stress_pattern)
var coarse_stress_pattern : Sequence[CoarseStress]
-
Maps a detailed stress sequence to a sequence of strong and weak stressed syllabled
Expand source code
@property def coarse_stress_pattern(self) -> Sequence[CoarseStress]: """Maps a detailed stress sequence to a sequence of strong and weak stressed syllabled""" summary = [] for stress in self.stress_pattern: if stress == Stress.NO_STRESS: summary.append(CoarseStress.WEAK) elif stress in {Stress.PRIMARY, Stress.SECONDARY}: summary.append(CoarseStress.STRONG) return summary
var num_syllables
-
A syllable contains at most one stressed element (weak or strong)
Expand source code
@property def num_syllables(self): """A syllable contains at most one stressed element (weak or strong)""" return len(self.coarse_stress_pattern)
Methods
def mask_syllables(self, mask: str = 'X') ‑> str
-
Converts a phonological word where each syllable is represented using the mask
conceptual examples: "poo" -> "X" where mask is "X" "July" -> "XX" where mask is "X"
Expand source code
def mask_syllables(self, mask: Text = "X") -> Text: """Converts a phonological word where each syllable is represented using the mask conceptual examples: "poo" -> "X" where mask is "X" "July" -> "XX" where mask is "X" """ return "".join(mask for cs in self.coarse_stress_pattern)
class Word (**data: Any)
-
The smallest sequence of phonemes that can be uttered in isolation with objective or practical meaning.
:param word: orthographic representation of this Word :param phonological_form: the phonological form of this Word
Create a new model by parsing and validating input data from keyword arguments.
Raises ValidationError if the input data cannot be parsed to form a valid model.
Expand source code
class Word(BaseModel, Hashable): r"""The smallest sequence of phonemes that can be uttered in isolation with objective or practical meaning. :param word: orthographic representation of this Word :param phonological_form: the phonological form of this Word """ word: Text phonological_form: PhonologicalWord @property def pf(self) -> PhonologicalWord: """Alias for phonological_form""" return self.phonological_form def graphemes(self) -> Sequence[Text]: """Individual characters/symbols that comprise the orthographic representation of the `clu.phontools.struct.Word`""" return "".split(self.word) def __hash__(self) -> int: return hash((("word", self.word), ("pf", hash(self.phonological_form))))
Ancestors
- pydantic.main.BaseModel
- pydantic.utils.Representation
- Hashable
- abc.ABC
Class variables
var phonological_form : PhonologicalWord
var word : str
Instance variables
var pf : PhonologicalWord
-
Alias for phonological_form
Expand source code
@property def pf(self) -> PhonologicalWord: """Alias for phonological_form""" return self.phonological_form
Methods
def graphemes(self) ‑> Sequence[str]
-
Individual characters/symbols that comprise the orthographic representation of the
Word
Expand source code
def graphemes(self) -> Sequence[Text]: """Individual characters/symbols that comprise the orthographic representation of the `clu.phontools.struct.Word`""" return "".split(self.word)