Module clu.phontools.alignment.realine
Expand source code
from typing import Dict, Tuple, Text, Sequence
from pydantic import BaseModel
from clu.phontools import features
import numpy as np
class PhonemeErrors(BaseModel):
"""
stores phoneme errors.
"""
insertions: Sequence[Tuple[Text, Text]]
deletions: Sequence[Tuple[Text, Text]]
substitutions: Sequence[Tuple[Text, Text]]
@property
def edit_distance(self) -> int:
return len(self.insertions) + len(self.deletions) + len(self.substitutions)
def to_dict(self) -> Dict[str, float]:
return {
"insertions": self.insertions,
"deletions": self.deletions,
"substitutions": self.substitutions,
}
class ReAline:
"""
Feature-based algorithm for aligning two sequences of phones.
Based on Kondrak 2002
"""
inf = float("inf")
def __init__(
self,
similarity_matrix=features.similarity_matrix,
feature_matrix=features.feature_matrix,
salience=features.salience,
consonants=features.consonants,
C_skip=features.C_skip,
C_vwl=features.C_vwl,
C_sub=features.C_sub,
C_exp=features.C_exp,
R_c=features.R_c,
R_v=features.R_v,
):
self.similarity_matrix = similarity_matrix
self.feature_matrix = feature_matrix
self.consonants = consonants
self.salience = salience
self.C_skip = C_skip
# weight assigned to vowel, consonant pairs
self.C_vwl = C_vwl
self.C_sub = C_sub
self.C_exp = C_exp
# List of relevant features for consonants
self.R_c = R_c
# List of relevant features for vowels
self.R_v = R_v
# sanity check
self.sanity_check()
@staticmethod
def phoneme_errors(alignments: Sequence[Tuple[Text, Text]]) -> PhonemeErrors:
"""
Counts insertions, deletions, and substitutions according to the output of Re-Aline
"""
insertions = []
deletions = []
substitutions = []
for pair in alignments:
(phone_1, phone_2) = pair
if phone_1 == "-":
insertions.append(pair)
elif phone_2 == "-":
deletions.append(pair)
elif phone_1 != phone_2 and phone_1 != "-" and phone_2 != "-":
substitutions.append(pair)
return PhonemeErrors(
insertions=insertions, deletions=deletions, substitutions=substitutions
)
def sanity_check(self):
"""
Sanity check that ensures necessary features are present
"""
similarity_matrix = self.similarity_matrix
feature_matrix = self.feature_matrix
salience = self.salience
consonants = self.consonants
# ensure all salience values are found in feature matrix
feats = set()
feat_values = set()
for phone_fm in feature_matrix.values():
for (k, v) in phone_fm.items():
feats.add(k)
feat_values.add(v)
assert (
len(salience.keys() - feats) == 0
), f"salience and features for each sound in feature_matrix do not match: {salience.keys() - feats}"
assert (
len(similarity_matrix.keys() - feat_values) == 0
), f"similarity_matrix and feature values for each sound in feature_matrix do not match: {similarity_matrix.keys() - feat_values}"
missing = [c for c in consonants if c not in feature_matrix.keys()]
assert (
len(missing) == 0
), f"Some consonants missing from feature_matrix: {missing}"
def sigma_skip(self, p: Text) -> int:
"""
Returns score of an indel of P.
(Kondrak 2002: 54)
"""
return self.C_skip
def V(self, p: Text) -> int:
"""
Return vowel weight if P is vowel.
(Kondrak 2002: 54)
"""
return 0 if p in self.consonants else self.C_vwl
def R(self, p: Text, q: Text) -> Sequence[Text]:
"""
Return relevant features for segment comparsion.
(Kondrak 2002: 54)
"""
consonants = self.consonants
return self.R_c if p in consonants or q else self.R_v
def diff(self, p: Text, q: Text, f: Text) -> int:
"""
Returns difference between phonetic segments P and Q for feature F.
(Kondrak 2002: 52, 54)
"""
p_features, q_features = self.feature_matrix[p], self.feature_matrix[q]
return abs(
self.similarity_matrix[p_features[f]]
- self.similarity_matrix[q_features[f]]
)
def delta(self, p: Text, q: Text) -> int:
"""
Return weighted sum of difference between P and Q.
(Kondrak 2002: 54)
"""
features = self.R(p, q)
total = 0
for f in features:
total += self.diff(p, q, f) * self.salience[f]
return total
def sigma_sub(self, p: Text, q: Text) -> int:
"""
Returns score of a substitution of P with Q.
(Kondrak 2002: 54)
"""
return self.C_sub - self.delta(p, q) - self.V(p) - self.V(q)
def sigma_exp(self, p: Text, q: Sequence[Text]) -> int:
"""
Returns score of an expansion/compression.
(Kondrak 2002: 54)
"""
q1 = q[0]
q2 = q[1]
return (
self.C_exp
- self.delta(p, q1)
- self.delta(p, q2)
- self.V(p)
- max(self.V(q1), self.V(q2))
)
def _retrieve(self, i, j, s, S, T, seq1, seq2, out) -> Sequence[Tuple[Text, Text]]:
"""
Retrieve the path through the similarity matrix S starting at (i, j).
:return: Alignment of seq1 and seq2
"""
if S[i, j] == 0:
return out
else:
if (
j > 1
and S[i - 1, j - 2] + self.sigma_exp(seq1[i - 1], seq2[j - 2 : j]) + s
>= T
):
out.insert(0, (seq1[i - 1], seq2[j - 2 : j]))
self._retrieve(
i - 1,
j - 2,
s + self.sigma_exp(seq1[i - 1], seq2[j - 2 : j]),
S,
T,
seq1,
seq2,
out,
)
elif (
i > 1
and S[i - 2, j - 1] + self.sigma_exp(seq2[j - 1], seq1[i - 2 : i]) + s
>= T
):
out.insert(0, (seq1[i - 2 : i], seq2[j - 1]))
self._retrieve(
i - 2,
j - 1,
s + self.sigma_exp(seq2[j - 1], seq1[i - 2 : i]),
S,
T,
seq1,
seq2,
out,
)
elif S[i, j - 1] + self.sigma_skip(seq2[j - 1]) + s >= T:
out.insert(0, ("-", seq2[j - 1]))
self._retrieve(
i, j - 1, s + self.sigma_skip(seq2[j - 1]), S, T, seq1, seq2, out
)
elif S[i - 1, j] + self.sigma_skip(seq1[i - 1]) + s >= T:
out.insert(0, (seq1[i - 1], "-"))
self._retrieve(
i - 1, j, s + self.sigma_skip(seq1[i - 1]), S, T, seq1, seq2, out
)
elif S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1]) + s >= T:
out.insert(0, (seq1[i - 1], seq2[j - 1]))
self._retrieve(
i - 1,
j - 1,
s + self.sigma_sub(seq1[i - 1], seq2[j - 1]),
S,
T,
seq1,
seq2,
out,
)
return out
def align(
self, seq1: Sequence[Text], seq2: Sequence[Text], epsilon: float = 0
) -> Sequence[Tuple[Text, Text]]:
"""
Computes the alignment of two symbol sequences.
:param seq1: a sequence of symbols
:param seq2: a sequence of symbols
:type epsilon: float (0.0 to 1.0)
:param epsilon: Adjusts threshold similarity score for near-optimal alignments
:return: Alignment(s) of seq1 and seq2
(Kondrak 2002: 51)
"""
assert 0.0 <= epsilon <= 1.0, "Epsilon must be between 0.0 and 1.0."
m = len(seq1)
n = len(seq2)
# This includes Kondrak's initialization of row 0 and column 0 to all 0s.
S = np.zeros((m + 1, n + 1), dtype=float)
# If i <= 1 or j <= 1, don't allow expansions as it doesn't make sense,
# and breaks array and string indices. Make sure they never get chosen
# by setting them to -inf.
for i in range(1, m + 1):
for j in range(1, n + 1):
edit1 = S[i - 1, j] + self.sigma_skip(seq1[i - 1])
edit2 = S[i, j - 1] + self.sigma_skip(seq2[j - 1])
edit3 = S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1])
if i > 1:
edit4 = S[i - 2, j - 1] + self.sigma_exp(
seq2[j - 1], seq1[i - 2 : i]
)
else:
edit4 = -ReAline.inf
if j > 1:
edit5 = S[i - 1, j - 2] + self.sigma_exp(
seq1[i - 1], seq2[j - 2 : j]
)
else:
edit5 = -ReAline.inf
S[i, j] = max(edit1, edit2, edit3, edit4, edit5, 0)
T = (1 - epsilon) * np.amax(S) # Threshold score for near-optimal alignments
alignments = []
for i in range(1, m + 1):
for j in range(1, n + 1):
if S[i, j] >= T:
alignments.append(self._retrieve(i, j, 0, S, T, seq1, seq2, []))
return [pair for alignment in alignments for pair in alignment]
Classes
class PhonemeErrors (**data: Any)
-
stores phoneme errors.
Create a new model by parsing and validating input data from keyword arguments.
Raises ValidationError if the input data cannot be parsed to form a valid model.
Expand source code
class PhonemeErrors(BaseModel): """ stores phoneme errors. """ insertions: Sequence[Tuple[Text, Text]] deletions: Sequence[Tuple[Text, Text]] substitutions: Sequence[Tuple[Text, Text]] @property def edit_distance(self) -> int: return len(self.insertions) + len(self.deletions) + len(self.substitutions) def to_dict(self) -> Dict[str, float]: return { "insertions": self.insertions, "deletions": self.deletions, "substitutions": self.substitutions, }
Ancestors
- pydantic.main.BaseModel
- pydantic.utils.Representation
Class variables
var deletions : Sequence[Tuple[str, str]]
var insertions : Sequence[Tuple[str, str]]
var substitutions : Sequence[Tuple[str, str]]
Instance variables
var edit_distance : int
-
Expand source code
@property def edit_distance(self) -> int: return len(self.insertions) + len(self.deletions) + len(self.substitutions)
Methods
def to_dict(self) ‑> Dict[str, float]
-
Expand source code
def to_dict(self) -> Dict[str, float]: return { "insertions": self.insertions, "deletions": self.deletions, "substitutions": self.substitutions, }
class ReAline (similarity_matrix={'bilabial': 1.0, 'labiodental': 0.95, 'dental': 0.9, 'alveolar': 0.85, 'retroflex': 0.8, 'palato-alveolar': 0.75, 'palatal': 0.7, 'velar': 0.6, 'uvular': 0.5, 'pharyngeal': 0.3, 'glottal': 0.1, 'labiovelar': 1.0, 'vowel': -1.0, 'stop': 1.0, 'affricate': 0.9, 'fricative': 0.85, 'trill': 0.7, 'tap': 0.65, 'approximant': 0.6, 'high': 1.0, 'mid': 0.5, 'low': 0.0, 'vowel2': 0.5, 'front': 1.0, 'central': 0.5, 'back': 0.0, 'plus': 1.0, 'minus': 0.0, 'lexical': 0.0}, feature_matrix={'tʃ': {'place': 'palato-alveolar', 'manner': 'affricate', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'dʒ': {'place': 'palato-alveolar', 'manner': 'affricate', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'p': {'place': 'bilabial', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'b': {'place': 'bilabial', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 't': {'place': 'alveolar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'd': {'place': 'alveolar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʈ': {'place': 'retroflex', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɖ': {'place': 'retroflex', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'c': {'place': 'palatal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɟ': {'place': 'palatal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'k': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'g': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɡ': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'q': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɢ': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʔ': {'place': 'glottal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'm': {'place': 'bilabial', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɱ': {'place': 'labiodental', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'n': {'place': 'alveolar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɳ': {'place': 'retroflex', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɲ': {'place': 'palatal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ŋ': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɴ': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'N': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʙ': {'place': 'bilabial', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'r': {'place': 'alveolar', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʀ': {'place': 'uvular', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'R': {'place': 'uvular', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɾ': {'place': 'alveolar', 'manner': 'tap', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɽ': {'place': 'retroflex', 'manner': 'tap', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɸ': {'place': 'bilabial', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'β': {'place': 'bilabial', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'f': {'place': 'labiodental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'v': {'place': 'labiodental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'θ': {'place': 'dental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ð': {'place': 'dental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 's': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'z': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʃ': {'place': 'palato-alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʒ': {'place': 'palato-alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʂ': {'place': 'retroflex', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʐ': {'place': 'retroflex', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ç': {'place': 'palatal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʝ': {'place': 'palatal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'x': {'place': 'velar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɣ': {'place': 'velar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'χ': {'place': 'uvular', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʁ': {'place': 'uvular', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ħ': {'place': 'pharyngeal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʕ': {'place': 'pharyngeal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'h': {'place': 'glottal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɦ': {'place': 'glottal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɬ': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'plus', 'aspirated': 'minus'}, 'ɮ': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'plus', 'aspirated': 'minus'}, 'ʋ': {'place': 'labiodental', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɹ': {'place': 'alveolar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɻ': {'place': 'retroflex', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'j': {'place': 'palatal', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɰ': {'place': 'velar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'l': {'place': 'alveolar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'plus', 'aspirated': 'minus'}, 'w': {'place': 'labiovelar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɑ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɑ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɑ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɑ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɪ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɪ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɪ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɪ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'i': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'i1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'i2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'i3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'y': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'y1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'y2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'y3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'e': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'e1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'e2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'e3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'E': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'E1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'E2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'E3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ø': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ø1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ø2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ø3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɛ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɛ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɛ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɛ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'œ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'œ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'œ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'œ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'æ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'æ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'æ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'æ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'a': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'a1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'a2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'a3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'A': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'A1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'A2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'A3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɨ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɨ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɨ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɨ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʉ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʉ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʉ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʉ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ə': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ə1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ə2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ə3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'u': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'u1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'u2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'u3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'U': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'U1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'U2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'U3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'o': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'o1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'o2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'o3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'O': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'O1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'O2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'O3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɔ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɔ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɔ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɔ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɒ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɒ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɒ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɒ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'I': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'I1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'I2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'I3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ʌ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʌ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʌ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʌ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʊ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʊ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʊ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʊ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɜ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɜ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɜ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɜ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'aɪ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɑɪ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɑi': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɔɪ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'aʊ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɑʊ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'LB': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus', 'boundary': 'lexical'}}, salience={'syllabic': 5, 'place': 40, 'manner': 50, 'voice': 5, 'nasal': 20, 'retroflex': 10, 'lateral': 10, 'aspirated': 5, 'long': 0, 'high': 3, 'back': 2, 'round': 2, 'boundary': 0}, consonants=['N', 'R', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'x', 'z', 'ç', 'ð', 'ħ', 'ŋ', 'ɖ', 'ɟ', 'ɢ', 'ɣ', 'ɦ', 'ɬ', 'ɮ', 'ɰ', 'ɱ', 'ɲ', 'ɳ', 'ɴ', 'ɸ', 'ɹ', 'ɻ', 'ɽ', 'ɾ', 'ʀ', 'ʁ', 'ʂ', 'ʃ', 'ʈ', 'ʋ', 'ʒ', 'ʔ', 'ʕ', 'ʙ', 'ʝ', 'β', 'θ', 'χ', 'ʐ', 'w', 'ɜ', 'ɡ', 'LB'], C_skip=10, C_vwl=5, C_sub=35, C_exp=45, R_c=['aspirated', 'lateral', 'manner', 'nasal', 'place', 'retroflex', 'syllabic', 'voice'], R_v=['back', 'lateral', 'long', 'manner', 'nasal', 'place', 'retroflex', 'round', 'syllabic', 'voice'])
-
Feature-based algorithm for aligning two sequences of phones.
Based on Kondrak 2002
Expand source code
class ReAline: """ Feature-based algorithm for aligning two sequences of phones. Based on Kondrak 2002 """ inf = float("inf") def __init__( self, similarity_matrix=features.similarity_matrix, feature_matrix=features.feature_matrix, salience=features.salience, consonants=features.consonants, C_skip=features.C_skip, C_vwl=features.C_vwl, C_sub=features.C_sub, C_exp=features.C_exp, R_c=features.R_c, R_v=features.R_v, ): self.similarity_matrix = similarity_matrix self.feature_matrix = feature_matrix self.consonants = consonants self.salience = salience self.C_skip = C_skip # weight assigned to vowel, consonant pairs self.C_vwl = C_vwl self.C_sub = C_sub self.C_exp = C_exp # List of relevant features for consonants self.R_c = R_c # List of relevant features for vowels self.R_v = R_v # sanity check self.sanity_check() @staticmethod def phoneme_errors(alignments: Sequence[Tuple[Text, Text]]) -> PhonemeErrors: """ Counts insertions, deletions, and substitutions according to the output of Re-Aline """ insertions = [] deletions = [] substitutions = [] for pair in alignments: (phone_1, phone_2) = pair if phone_1 == "-": insertions.append(pair) elif phone_2 == "-": deletions.append(pair) elif phone_1 != phone_2 and phone_1 != "-" and phone_2 != "-": substitutions.append(pair) return PhonemeErrors( insertions=insertions, deletions=deletions, substitutions=substitutions ) def sanity_check(self): """ Sanity check that ensures necessary features are present """ similarity_matrix = self.similarity_matrix feature_matrix = self.feature_matrix salience = self.salience consonants = self.consonants # ensure all salience values are found in feature matrix feats = set() feat_values = set() for phone_fm in feature_matrix.values(): for (k, v) in phone_fm.items(): feats.add(k) feat_values.add(v) assert ( len(salience.keys() - feats) == 0 ), f"salience and features for each sound in feature_matrix do not match: {salience.keys() - feats}" assert ( len(similarity_matrix.keys() - feat_values) == 0 ), f"similarity_matrix and feature values for each sound in feature_matrix do not match: {similarity_matrix.keys() - feat_values}" missing = [c for c in consonants if c not in feature_matrix.keys()] assert ( len(missing) == 0 ), f"Some consonants missing from feature_matrix: {missing}" def sigma_skip(self, p: Text) -> int: """ Returns score of an indel of P. (Kondrak 2002: 54) """ return self.C_skip def V(self, p: Text) -> int: """ Return vowel weight if P is vowel. (Kondrak 2002: 54) """ return 0 if p in self.consonants else self.C_vwl def R(self, p: Text, q: Text) -> Sequence[Text]: """ Return relevant features for segment comparsion. (Kondrak 2002: 54) """ consonants = self.consonants return self.R_c if p in consonants or q else self.R_v def diff(self, p: Text, q: Text, f: Text) -> int: """ Returns difference between phonetic segments P and Q for feature F. (Kondrak 2002: 52, 54) """ p_features, q_features = self.feature_matrix[p], self.feature_matrix[q] return abs( self.similarity_matrix[p_features[f]] - self.similarity_matrix[q_features[f]] ) def delta(self, p: Text, q: Text) -> int: """ Return weighted sum of difference between P and Q. (Kondrak 2002: 54) """ features = self.R(p, q) total = 0 for f in features: total += self.diff(p, q, f) * self.salience[f] return total def sigma_sub(self, p: Text, q: Text) -> int: """ Returns score of a substitution of P with Q. (Kondrak 2002: 54) """ return self.C_sub - self.delta(p, q) - self.V(p) - self.V(q) def sigma_exp(self, p: Text, q: Sequence[Text]) -> int: """ Returns score of an expansion/compression. (Kondrak 2002: 54) """ q1 = q[0] q2 = q[1] return ( self.C_exp - self.delta(p, q1) - self.delta(p, q2) - self.V(p) - max(self.V(q1), self.V(q2)) ) def _retrieve(self, i, j, s, S, T, seq1, seq2, out) -> Sequence[Tuple[Text, Text]]: """ Retrieve the path through the similarity matrix S starting at (i, j). :return: Alignment of seq1 and seq2 """ if S[i, j] == 0: return out else: if ( j > 1 and S[i - 1, j - 2] + self.sigma_exp(seq1[i - 1], seq2[j - 2 : j]) + s >= T ): out.insert(0, (seq1[i - 1], seq2[j - 2 : j])) self._retrieve( i - 1, j - 2, s + self.sigma_exp(seq1[i - 1], seq2[j - 2 : j]), S, T, seq1, seq2, out, ) elif ( i > 1 and S[i - 2, j - 1] + self.sigma_exp(seq2[j - 1], seq1[i - 2 : i]) + s >= T ): out.insert(0, (seq1[i - 2 : i], seq2[j - 1])) self._retrieve( i - 2, j - 1, s + self.sigma_exp(seq2[j - 1], seq1[i - 2 : i]), S, T, seq1, seq2, out, ) elif S[i, j - 1] + self.sigma_skip(seq2[j - 1]) + s >= T: out.insert(0, ("-", seq2[j - 1])) self._retrieve( i, j - 1, s + self.sigma_skip(seq2[j - 1]), S, T, seq1, seq2, out ) elif S[i - 1, j] + self.sigma_skip(seq1[i - 1]) + s >= T: out.insert(0, (seq1[i - 1], "-")) self._retrieve( i - 1, j, s + self.sigma_skip(seq1[i - 1]), S, T, seq1, seq2, out ) elif S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1]) + s >= T: out.insert(0, (seq1[i - 1], seq2[j - 1])) self._retrieve( i - 1, j - 1, s + self.sigma_sub(seq1[i - 1], seq2[j - 1]), S, T, seq1, seq2, out, ) return out def align( self, seq1: Sequence[Text], seq2: Sequence[Text], epsilon: float = 0 ) -> Sequence[Tuple[Text, Text]]: """ Computes the alignment of two symbol sequences. :param seq1: a sequence of symbols :param seq2: a sequence of symbols :type epsilon: float (0.0 to 1.0) :param epsilon: Adjusts threshold similarity score for near-optimal alignments :return: Alignment(s) of seq1 and seq2 (Kondrak 2002: 51) """ assert 0.0 <= epsilon <= 1.0, "Epsilon must be between 0.0 and 1.0." m = len(seq1) n = len(seq2) # This includes Kondrak's initialization of row 0 and column 0 to all 0s. S = np.zeros((m + 1, n + 1), dtype=float) # If i <= 1 or j <= 1, don't allow expansions as it doesn't make sense, # and breaks array and string indices. Make sure they never get chosen # by setting them to -inf. for i in range(1, m + 1): for j in range(1, n + 1): edit1 = S[i - 1, j] + self.sigma_skip(seq1[i - 1]) edit2 = S[i, j - 1] + self.sigma_skip(seq2[j - 1]) edit3 = S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1]) if i > 1: edit4 = S[i - 2, j - 1] + self.sigma_exp( seq2[j - 1], seq1[i - 2 : i] ) else: edit4 = -ReAline.inf if j > 1: edit5 = S[i - 1, j - 2] + self.sigma_exp( seq1[i - 1], seq2[j - 2 : j] ) else: edit5 = -ReAline.inf S[i, j] = max(edit1, edit2, edit3, edit4, edit5, 0) T = (1 - epsilon) * np.amax(S) # Threshold score for near-optimal alignments alignments = [] for i in range(1, m + 1): for j in range(1, n + 1): if S[i, j] >= T: alignments.append(self._retrieve(i, j, 0, S, T, seq1, seq2, [])) return [pair for alignment in alignments for pair in alignment]
Class variables
var inf
Static methods
def phoneme_errors(alignments: Sequence[Tuple[str, str]]) ‑> PhonemeErrors
-
Counts insertions, deletions, and substitutions according to the output of Re-Aline
Expand source code
@staticmethod def phoneme_errors(alignments: Sequence[Tuple[Text, Text]]) -> PhonemeErrors: """ Counts insertions, deletions, and substitutions according to the output of Re-Aline """ insertions = [] deletions = [] substitutions = [] for pair in alignments: (phone_1, phone_2) = pair if phone_1 == "-": insertions.append(pair) elif phone_2 == "-": deletions.append(pair) elif phone_1 != phone_2 and phone_1 != "-" and phone_2 != "-": substitutions.append(pair) return PhonemeErrors( insertions=insertions, deletions=deletions, substitutions=substitutions )
Methods
def R(self, p: str, q: str) ‑> Sequence[str]
-
Return relevant features for segment comparsion. (Kondrak 2002: 54)
Expand source code
def R(self, p: Text, q: Text) -> Sequence[Text]: """ Return relevant features for segment comparsion. (Kondrak 2002: 54) """ consonants = self.consonants return self.R_c if p in consonants or q else self.R_v
def V(self, p: str) ‑> int
-
Return vowel weight if P is vowel. (Kondrak 2002: 54)
Expand source code
def V(self, p: Text) -> int: """ Return vowel weight if P is vowel. (Kondrak 2002: 54) """ return 0 if p in self.consonants else self.C_vwl
def align(self, seq1: Sequence[str], seq2: Sequence[str], epsilon: float = 0) ‑> Sequence[Tuple[str, str]]
-
Computes the alignment of two symbol sequences.
:param seq1: a sequence of symbols :param seq2: a sequence of symbols
:type epsilon: float (0.0 to 1.0) :param epsilon: Adjusts threshold similarity score for near-optimal alignments :return: Alignment(s) of seq1 and seq2 (Kondrak 2002: 51)
Expand source code
def align( self, seq1: Sequence[Text], seq2: Sequence[Text], epsilon: float = 0 ) -> Sequence[Tuple[Text, Text]]: """ Computes the alignment of two symbol sequences. :param seq1: a sequence of symbols :param seq2: a sequence of symbols :type epsilon: float (0.0 to 1.0) :param epsilon: Adjusts threshold similarity score for near-optimal alignments :return: Alignment(s) of seq1 and seq2 (Kondrak 2002: 51) """ assert 0.0 <= epsilon <= 1.0, "Epsilon must be between 0.0 and 1.0." m = len(seq1) n = len(seq2) # This includes Kondrak's initialization of row 0 and column 0 to all 0s. S = np.zeros((m + 1, n + 1), dtype=float) # If i <= 1 or j <= 1, don't allow expansions as it doesn't make sense, # and breaks array and string indices. Make sure they never get chosen # by setting them to -inf. for i in range(1, m + 1): for j in range(1, n + 1): edit1 = S[i - 1, j] + self.sigma_skip(seq1[i - 1]) edit2 = S[i, j - 1] + self.sigma_skip(seq2[j - 1]) edit3 = S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1]) if i > 1: edit4 = S[i - 2, j - 1] + self.sigma_exp( seq2[j - 1], seq1[i - 2 : i] ) else: edit4 = -ReAline.inf if j > 1: edit5 = S[i - 1, j - 2] + self.sigma_exp( seq1[i - 1], seq2[j - 2 : j] ) else: edit5 = -ReAline.inf S[i, j] = max(edit1, edit2, edit3, edit4, edit5, 0) T = (1 - epsilon) * np.amax(S) # Threshold score for near-optimal alignments alignments = [] for i in range(1, m + 1): for j in range(1, n + 1): if S[i, j] >= T: alignments.append(self._retrieve(i, j, 0, S, T, seq1, seq2, [])) return [pair for alignment in alignments for pair in alignment]
def delta(self, p: str, q: str) ‑> int
-
Return weighted sum of difference between P and Q. (Kondrak 2002: 54)
Expand source code
def delta(self, p: Text, q: Text) -> int: """ Return weighted sum of difference between P and Q. (Kondrak 2002: 54) """ features = self.R(p, q) total = 0 for f in features: total += self.diff(p, q, f) * self.salience[f] return total
def diff(self, p: str, q: str, f: str) ‑> int
-
Returns difference between phonetic segments P and Q for feature F. (Kondrak 2002: 52, 54)
Expand source code
def diff(self, p: Text, q: Text, f: Text) -> int: """ Returns difference between phonetic segments P and Q for feature F. (Kondrak 2002: 52, 54) """ p_features, q_features = self.feature_matrix[p], self.feature_matrix[q] return abs( self.similarity_matrix[p_features[f]] - self.similarity_matrix[q_features[f]] )
def sanity_check(self)
-
Sanity check that ensures necessary features are present
Expand source code
def sanity_check(self): """ Sanity check that ensures necessary features are present """ similarity_matrix = self.similarity_matrix feature_matrix = self.feature_matrix salience = self.salience consonants = self.consonants # ensure all salience values are found in feature matrix feats = set() feat_values = set() for phone_fm in feature_matrix.values(): for (k, v) in phone_fm.items(): feats.add(k) feat_values.add(v) assert ( len(salience.keys() - feats) == 0 ), f"salience and features for each sound in feature_matrix do not match: {salience.keys() - feats}" assert ( len(similarity_matrix.keys() - feat_values) == 0 ), f"similarity_matrix and feature values for each sound in feature_matrix do not match: {similarity_matrix.keys() - feat_values}" missing = [c for c in consonants if c not in feature_matrix.keys()] assert ( len(missing) == 0 ), f"Some consonants missing from feature_matrix: {missing}"
def sigma_exp(self, p: str, q: Sequence[str]) ‑> int
-
Returns score of an expansion/compression. (Kondrak 2002: 54)
Expand source code
def sigma_exp(self, p: Text, q: Sequence[Text]) -> int: """ Returns score of an expansion/compression. (Kondrak 2002: 54) """ q1 = q[0] q2 = q[1] return ( self.C_exp - self.delta(p, q1) - self.delta(p, q2) - self.V(p) - max(self.V(q1), self.V(q2)) )
def sigma_skip(self, p: str) ‑> int
-
Returns score of an indel of P. (Kondrak 2002: 54)
Expand source code
def sigma_skip(self, p: Text) -> int: """ Returns score of an indel of P. (Kondrak 2002: 54) """ return self.C_skip
def sigma_sub(self, p: str, q: str) ‑> int
-
Returns score of a substitution of P with Q. (Kondrak 2002: 54)
Expand source code
def sigma_sub(self, p: Text, q: Text) -> int: """ Returns score of a substitution of P with Q. (Kondrak 2002: 54) """ return self.C_sub - self.delta(p, q) - self.V(p) - self.V(q)