Module clu.phontools.alignment.realine

Expand source code
from typing import Dict, Tuple, Text, Sequence
from pydantic import BaseModel
from clu.phontools import features
import numpy as np


class PhonemeErrors(BaseModel):
    """
    stores phoneme errors.
    """

    insertions: Sequence[Tuple[Text, Text]]
    deletions: Sequence[Tuple[Text, Text]]
    substitutions: Sequence[Tuple[Text, Text]]

    @property
    def edit_distance(self) -> int:
        return len(self.insertions) + len(self.deletions) + len(self.substitutions)

    def to_dict(self) -> Dict[str, float]:
        return {
            "insertions": self.insertions,
            "deletions": self.deletions,
            "substitutions": self.substitutions,
        }


class ReAline:
    """
    Feature-based algorithm for aligning two sequences of phones.

    Based on Kondrak 2002
    """

    inf = float("inf")

    def __init__(
        self,
        similarity_matrix=features.similarity_matrix,
        feature_matrix=features.feature_matrix,
        salience=features.salience,
        consonants=features.consonants,
        C_skip=features.C_skip,
        C_vwl=features.C_vwl,
        C_sub=features.C_sub,
        C_exp=features.C_exp,
        R_c=features.R_c,
        R_v=features.R_v,
    ):
        self.similarity_matrix = similarity_matrix
        self.feature_matrix = feature_matrix
        self.consonants = consonants
        self.salience = salience
        self.C_skip = C_skip
        # weight assigned to vowel, consonant pairs
        self.C_vwl = C_vwl
        self.C_sub = C_sub
        self.C_exp = C_exp
        # List of relevant features for consonants
        self.R_c = R_c
        # List of relevant features for vowels
        self.R_v = R_v
        # sanity check
        self.sanity_check()

    @staticmethod
    def phoneme_errors(alignments: Sequence[Tuple[Text, Text]]) -> PhonemeErrors:
        """
        Counts insertions, deletions, and substitutions according to the output of Re-Aline
        """
        insertions = []
        deletions = []
        substitutions = []
        for pair in alignments:
            (phone_1, phone_2) = pair
            if phone_1 == "-":
                insertions.append(pair)
            elif phone_2 == "-":
                deletions.append(pair)
            elif phone_1 != phone_2 and phone_1 != "-" and phone_2 != "-":
                substitutions.append(pair)
        return PhonemeErrors(
            insertions=insertions, deletions=deletions, substitutions=substitutions
        )

    def sanity_check(self):
        """
        Sanity check that ensures necessary features are present
        """

        similarity_matrix = self.similarity_matrix
        feature_matrix = self.feature_matrix
        salience = self.salience
        consonants = self.consonants

        # ensure all salience values are found in feature matrix
        feats = set()
        feat_values = set()
        for phone_fm in feature_matrix.values():
            for (k, v) in phone_fm.items():
                feats.add(k)
                feat_values.add(v)

        assert (
            len(salience.keys() - feats) == 0
        ), f"salience and features for each sound in feature_matrix do not match: {salience.keys() - feats}"

        assert (
            len(similarity_matrix.keys() - feat_values) == 0
        ), f"similarity_matrix and feature values for each sound in feature_matrix do not match: {similarity_matrix.keys() - feat_values}"

        missing = [c for c in consonants if c not in feature_matrix.keys()]
        assert (
            len(missing) == 0
        ), f"Some consonants missing from feature_matrix: {missing}"

    def sigma_skip(self, p: Text) -> int:
        """
        Returns score of an indel of P.
        (Kondrak 2002: 54)
        """
        return self.C_skip

    def V(self, p: Text) -> int:
        """
        Return vowel weight if P is vowel.
        (Kondrak 2002: 54)
        """
        return 0 if p in self.consonants else self.C_vwl

    def R(self, p: Text, q: Text) -> Sequence[Text]:
        """
        Return relevant features for segment comparsion.
        (Kondrak 2002: 54)
        """
        consonants = self.consonants

        return self.R_c if p in consonants or q else self.R_v

    def diff(self, p: Text, q: Text, f: Text) -> int:
        """
        Returns difference between phonetic segments P and Q for feature F.
        (Kondrak 2002: 52, 54)
        """
        p_features, q_features = self.feature_matrix[p], self.feature_matrix[q]
        return abs(
            self.similarity_matrix[p_features[f]]
            - self.similarity_matrix[q_features[f]]
        )

    def delta(self, p: Text, q: Text) -> int:
        """
        Return weighted sum of difference between P and Q.
        (Kondrak 2002: 54)
        """
        features = self.R(p, q)
        total = 0
        for f in features:
            total += self.diff(p, q, f) * self.salience[f]
        return total

    def sigma_sub(self, p: Text, q: Text) -> int:
        """
        Returns score of a substitution of P with Q.
        (Kondrak 2002: 54)
        """
        return self.C_sub - self.delta(p, q) - self.V(p) - self.V(q)

    def sigma_exp(self, p: Text, q: Sequence[Text]) -> int:
        """
        Returns score of an expansion/compression.
        (Kondrak 2002: 54)
        """
        q1 = q[0]
        q2 = q[1]
        return (
            self.C_exp
            - self.delta(p, q1)
            - self.delta(p, q2)
            - self.V(p)
            - max(self.V(q1), self.V(q2))
        )

    def _retrieve(self, i, j, s, S, T, seq1, seq2, out) -> Sequence[Tuple[Text, Text]]:
        """
        Retrieve the path through the similarity matrix S starting at (i, j).

        :return: Alignment of seq1 and seq2
        """
        if S[i, j] == 0:
            return out
        else:
            if (
                j > 1
                and S[i - 1, j - 2] + self.sigma_exp(seq1[i - 1], seq2[j - 2 : j]) + s
                >= T
            ):
                out.insert(0, (seq1[i - 1], seq2[j - 2 : j]))
                self._retrieve(
                    i - 1,
                    j - 2,
                    s + self.sigma_exp(seq1[i - 1], seq2[j - 2 : j]),
                    S,
                    T,
                    seq1,
                    seq2,
                    out,
                )
            elif (
                i > 1
                and S[i - 2, j - 1] + self.sigma_exp(seq2[j - 1], seq1[i - 2 : i]) + s
                >= T
            ):
                out.insert(0, (seq1[i - 2 : i], seq2[j - 1]))
                self._retrieve(
                    i - 2,
                    j - 1,
                    s + self.sigma_exp(seq2[j - 1], seq1[i - 2 : i]),
                    S,
                    T,
                    seq1,
                    seq2,
                    out,
                )
            elif S[i, j - 1] + self.sigma_skip(seq2[j - 1]) + s >= T:
                out.insert(0, ("-", seq2[j - 1]))
                self._retrieve(
                    i, j - 1, s + self.sigma_skip(seq2[j - 1]), S, T, seq1, seq2, out
                )
            elif S[i - 1, j] + self.sigma_skip(seq1[i - 1]) + s >= T:
                out.insert(0, (seq1[i - 1], "-"))
                self._retrieve(
                    i - 1, j, s + self.sigma_skip(seq1[i - 1]), S, T, seq1, seq2, out
                )
            elif S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1]) + s >= T:
                out.insert(0, (seq1[i - 1], seq2[j - 1]))
                self._retrieve(
                    i - 1,
                    j - 1,
                    s + self.sigma_sub(seq1[i - 1], seq2[j - 1]),
                    S,
                    T,
                    seq1,
                    seq2,
                    out,
                )
        return out

    def align(
        self, seq1: Sequence[Text], seq2: Sequence[Text], epsilon: float = 0
    ) -> Sequence[Tuple[Text, Text]]:
        """
        Computes the alignment of two symbol sequences.

        :param seq1: a sequence of symbols
        :param seq2: a sequence of symbols

        :type epsilon: float (0.0 to 1.0)
        :param epsilon: Adjusts threshold similarity score for near-optimal alignments
        :return: Alignment(s) of seq1 and seq2
        (Kondrak 2002: 51)
        """

        assert 0.0 <= epsilon <= 1.0, "Epsilon must be between 0.0 and 1.0."

        m = len(seq1)
        n = len(seq2)
        # This includes Kondrak's initialization of row 0 and column 0 to all 0s.
        S = np.zeros((m + 1, n + 1), dtype=float)
        # If i <= 1 or j <= 1, don't allow expansions as it doesn't make sense,
        # and breaks array and string indices. Make sure they never get chosen
        # by setting them to -inf.
        for i in range(1, m + 1):
            for j in range(1, n + 1):
                edit1 = S[i - 1, j] + self.sigma_skip(seq1[i - 1])
                edit2 = S[i, j - 1] + self.sigma_skip(seq2[j - 1])
                edit3 = S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1])
                if i > 1:
                    edit4 = S[i - 2, j - 1] + self.sigma_exp(
                        seq2[j - 1], seq1[i - 2 : i]
                    )
                else:
                    edit4 = -ReAline.inf
                if j > 1:
                    edit5 = S[i - 1, j - 2] + self.sigma_exp(
                        seq1[i - 1], seq2[j - 2 : j]
                    )
                else:
                    edit5 = -ReAline.inf
                S[i, j] = max(edit1, edit2, edit3, edit4, edit5, 0)
        T = (1 - epsilon) * np.amax(S)  # Threshold score for near-optimal alignments

        alignments = []
        for i in range(1, m + 1):
            for j in range(1, n + 1):
                if S[i, j] >= T:
                    alignments.append(self._retrieve(i, j, 0, S, T, seq1, seq2, []))
        return [pair for alignment in alignments for pair in alignment]

Classes

class PhonemeErrors (**data: Any)

stores phoneme errors.

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class PhonemeErrors(BaseModel):
    """
    stores phoneme errors.
    """

    insertions: Sequence[Tuple[Text, Text]]
    deletions: Sequence[Tuple[Text, Text]]
    substitutions: Sequence[Tuple[Text, Text]]

    @property
    def edit_distance(self) -> int:
        return len(self.insertions) + len(self.deletions) + len(self.substitutions)

    def to_dict(self) -> Dict[str, float]:
        return {
            "insertions": self.insertions,
            "deletions": self.deletions,
            "substitutions": self.substitutions,
        }

Ancestors

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

var deletions : Sequence[Tuple[str, str]]
var insertions : Sequence[Tuple[str, str]]
var substitutions : Sequence[Tuple[str, str]]

Instance variables

var edit_distance : int
Expand source code
@property
def edit_distance(self) -> int:
    return len(self.insertions) + len(self.deletions) + len(self.substitutions)

Methods

def to_dict(self) ‑> Dict[str, float]
Expand source code
def to_dict(self) -> Dict[str, float]:
    return {
        "insertions": self.insertions,
        "deletions": self.deletions,
        "substitutions": self.substitutions,
    }
class ReAline (similarity_matrix={'bilabial': 1.0, 'labiodental': 0.95, 'dental': 0.9, 'alveolar': 0.85, 'retroflex': 0.8, 'palato-alveolar': 0.75, 'palatal': 0.7, 'velar': 0.6, 'uvular': 0.5, 'pharyngeal': 0.3, 'glottal': 0.1, 'labiovelar': 1.0, 'vowel': -1.0, 'stop': 1.0, 'affricate': 0.9, 'fricative': 0.85, 'trill': 0.7, 'tap': 0.65, 'approximant': 0.6, 'high': 1.0, 'mid': 0.5, 'low': 0.0, 'vowel2': 0.5, 'front': 1.0, 'central': 0.5, 'back': 0.0, 'plus': 1.0, 'minus': 0.0, 'lexical': 0.0}, feature_matrix={'tʃ': {'place': 'palato-alveolar', 'manner': 'affricate', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'dʒ': {'place': 'palato-alveolar', 'manner': 'affricate', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'p': {'place': 'bilabial', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'b': {'place': 'bilabial', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 't': {'place': 'alveolar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'd': {'place': 'alveolar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʈ': {'place': 'retroflex', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɖ': {'place': 'retroflex', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'c': {'place': 'palatal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɟ': {'place': 'palatal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'k': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'g': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɡ': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'q': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɢ': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʔ': {'place': 'glottal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'm': {'place': 'bilabial', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɱ': {'place': 'labiodental', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'n': {'place': 'alveolar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɳ': {'place': 'retroflex', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɲ': {'place': 'palatal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ŋ': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɴ': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'N': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʙ': {'place': 'bilabial', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'r': {'place': 'alveolar', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʀ': {'place': 'uvular', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'R': {'place': 'uvular', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɾ': {'place': 'alveolar', 'manner': 'tap', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɽ': {'place': 'retroflex', 'manner': 'tap', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɸ': {'place': 'bilabial', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'β': {'place': 'bilabial', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'f': {'place': 'labiodental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'v': {'place': 'labiodental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'θ': {'place': 'dental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ð': {'place': 'dental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 's': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'z': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʃ': {'place': 'palato-alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʒ': {'place': 'palato-alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʂ': {'place': 'retroflex', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʐ': {'place': 'retroflex', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ç': {'place': 'palatal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʝ': {'place': 'palatal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'x': {'place': 'velar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɣ': {'place': 'velar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'χ': {'place': 'uvular', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʁ': {'place': 'uvular', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ħ': {'place': 'pharyngeal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ʕ': {'place': 'pharyngeal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'h': {'place': 'glottal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɦ': {'place': 'glottal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɬ': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'plus', 'aspirated': 'minus'}, 'ɮ': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'plus', 'aspirated': 'minus'}, 'ʋ': {'place': 'labiodental', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɹ': {'place': 'alveolar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɻ': {'place': 'retroflex', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'}, 'j': {'place': 'palatal', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɰ': {'place': 'velar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'l': {'place': 'alveolar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'plus', 'aspirated': 'minus'}, 'w': {'place': 'labiovelar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'}, 'ɑ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɑ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɑ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɑ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɪ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɪ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɪ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɪ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'i': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'i1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'i2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'i3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'y': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'y1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'y2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'y3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'e': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'e1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'e2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'e3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'E': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'E1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'E2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'E3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ø': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ø1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ø2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ø3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɛ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɛ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɛ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɛ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'œ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'œ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'œ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'œ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'front', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'æ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'æ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'æ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'æ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'a': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'a1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'a2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'a3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'A': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'A1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'A2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'A3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ɨ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɨ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɨ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɨ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʉ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʉ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʉ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʉ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'central', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ə': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ə1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ə2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ə3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'central', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'u': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'u1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'u2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'u3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'U': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'U1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'U2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'U3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'o': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'o1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'o2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'o3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'O': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'O1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'O2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'O3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɔ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɔ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɔ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɔ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'plus', 'long': 'plus', 'aspirated': 'minus'}, 'ɒ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɒ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɒ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ɒ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'I': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'I1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'I2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'I3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'front', 'round': 'minus', 'long': 'plus', 'aspirated': 'minus'}, 'ʌ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʌ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʌ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʌ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid', 'back': 'back', 'round': 'minus', 'long': 'minus', 'aspirated': 'minus'}, 'ʊ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʊ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʊ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ʊ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɜ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɜ1': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɜ2': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɜ3': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'aɪ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɑɪ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɑi': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɔɪ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'aʊ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'ɑʊ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus'}, 'LB': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus', 'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high', 'back': 'back', 'round': 'plus', 'long': 'minus', 'aspirated': 'minus', 'boundary': 'lexical'}}, salience={'syllabic': 5, 'place': 40, 'manner': 50, 'voice': 5, 'nasal': 20, 'retroflex': 10, 'lateral': 10, 'aspirated': 5, 'long': 0, 'high': 3, 'back': 2, 'round': 2, 'boundary': 0}, consonants=['N', 'R', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'x', 'z', 'ç', 'ð', 'ħ', 'ŋ', 'ɖ', 'ɟ', 'ɢ', 'ɣ', 'ɦ', 'ɬ', 'ɮ', 'ɰ', 'ɱ', 'ɲ', 'ɳ', 'ɴ', 'ɸ', 'ɹ', 'ɻ', 'ɽ', 'ɾ', 'ʀ', 'ʁ', 'ʂ', 'ʃ', 'ʈ', 'ʋ', 'ʒ', 'ʔ', 'ʕ', 'ʙ', 'ʝ', 'β', 'θ', 'χ', 'ʐ', 'w', 'ɜ', 'ɡ', 'LB'], C_skip=10, C_vwl=5, C_sub=35, C_exp=45, R_c=['aspirated', 'lateral', 'manner', 'nasal', 'place', 'retroflex', 'syllabic', 'voice'], R_v=['back', 'lateral', 'long', 'manner', 'nasal', 'place', 'retroflex', 'round', 'syllabic', 'voice'])

Feature-based algorithm for aligning two sequences of phones.

Based on Kondrak 2002

Expand source code
class ReAline:
    """
    Feature-based algorithm for aligning two sequences of phones.

    Based on Kondrak 2002
    """

    inf = float("inf")

    def __init__(
        self,
        similarity_matrix=features.similarity_matrix,
        feature_matrix=features.feature_matrix,
        salience=features.salience,
        consonants=features.consonants,
        C_skip=features.C_skip,
        C_vwl=features.C_vwl,
        C_sub=features.C_sub,
        C_exp=features.C_exp,
        R_c=features.R_c,
        R_v=features.R_v,
    ):
        self.similarity_matrix = similarity_matrix
        self.feature_matrix = feature_matrix
        self.consonants = consonants
        self.salience = salience
        self.C_skip = C_skip
        # weight assigned to vowel, consonant pairs
        self.C_vwl = C_vwl
        self.C_sub = C_sub
        self.C_exp = C_exp
        # List of relevant features for consonants
        self.R_c = R_c
        # List of relevant features for vowels
        self.R_v = R_v
        # sanity check
        self.sanity_check()

    @staticmethod
    def phoneme_errors(alignments: Sequence[Tuple[Text, Text]]) -> PhonemeErrors:
        """
        Counts insertions, deletions, and substitutions according to the output of Re-Aline
        """
        insertions = []
        deletions = []
        substitutions = []
        for pair in alignments:
            (phone_1, phone_2) = pair
            if phone_1 == "-":
                insertions.append(pair)
            elif phone_2 == "-":
                deletions.append(pair)
            elif phone_1 != phone_2 and phone_1 != "-" and phone_2 != "-":
                substitutions.append(pair)
        return PhonemeErrors(
            insertions=insertions, deletions=deletions, substitutions=substitutions
        )

    def sanity_check(self):
        """
        Sanity check that ensures necessary features are present
        """

        similarity_matrix = self.similarity_matrix
        feature_matrix = self.feature_matrix
        salience = self.salience
        consonants = self.consonants

        # ensure all salience values are found in feature matrix
        feats = set()
        feat_values = set()
        for phone_fm in feature_matrix.values():
            for (k, v) in phone_fm.items():
                feats.add(k)
                feat_values.add(v)

        assert (
            len(salience.keys() - feats) == 0
        ), f"salience and features for each sound in feature_matrix do not match: {salience.keys() - feats}"

        assert (
            len(similarity_matrix.keys() - feat_values) == 0
        ), f"similarity_matrix and feature values for each sound in feature_matrix do not match: {similarity_matrix.keys() - feat_values}"

        missing = [c for c in consonants if c not in feature_matrix.keys()]
        assert (
            len(missing) == 0
        ), f"Some consonants missing from feature_matrix: {missing}"

    def sigma_skip(self, p: Text) -> int:
        """
        Returns score of an indel of P.
        (Kondrak 2002: 54)
        """
        return self.C_skip

    def V(self, p: Text) -> int:
        """
        Return vowel weight if P is vowel.
        (Kondrak 2002: 54)
        """
        return 0 if p in self.consonants else self.C_vwl

    def R(self, p: Text, q: Text) -> Sequence[Text]:
        """
        Return relevant features for segment comparsion.
        (Kondrak 2002: 54)
        """
        consonants = self.consonants

        return self.R_c if p in consonants or q else self.R_v

    def diff(self, p: Text, q: Text, f: Text) -> int:
        """
        Returns difference between phonetic segments P and Q for feature F.
        (Kondrak 2002: 52, 54)
        """
        p_features, q_features = self.feature_matrix[p], self.feature_matrix[q]
        return abs(
            self.similarity_matrix[p_features[f]]
            - self.similarity_matrix[q_features[f]]
        )

    def delta(self, p: Text, q: Text) -> int:
        """
        Return weighted sum of difference between P and Q.
        (Kondrak 2002: 54)
        """
        features = self.R(p, q)
        total = 0
        for f in features:
            total += self.diff(p, q, f) * self.salience[f]
        return total

    def sigma_sub(self, p: Text, q: Text) -> int:
        """
        Returns score of a substitution of P with Q.
        (Kondrak 2002: 54)
        """
        return self.C_sub - self.delta(p, q) - self.V(p) - self.V(q)

    def sigma_exp(self, p: Text, q: Sequence[Text]) -> int:
        """
        Returns score of an expansion/compression.
        (Kondrak 2002: 54)
        """
        q1 = q[0]
        q2 = q[1]
        return (
            self.C_exp
            - self.delta(p, q1)
            - self.delta(p, q2)
            - self.V(p)
            - max(self.V(q1), self.V(q2))
        )

    def _retrieve(self, i, j, s, S, T, seq1, seq2, out) -> Sequence[Tuple[Text, Text]]:
        """
        Retrieve the path through the similarity matrix S starting at (i, j).

        :return: Alignment of seq1 and seq2
        """
        if S[i, j] == 0:
            return out
        else:
            if (
                j > 1
                and S[i - 1, j - 2] + self.sigma_exp(seq1[i - 1], seq2[j - 2 : j]) + s
                >= T
            ):
                out.insert(0, (seq1[i - 1], seq2[j - 2 : j]))
                self._retrieve(
                    i - 1,
                    j - 2,
                    s + self.sigma_exp(seq1[i - 1], seq2[j - 2 : j]),
                    S,
                    T,
                    seq1,
                    seq2,
                    out,
                )
            elif (
                i > 1
                and S[i - 2, j - 1] + self.sigma_exp(seq2[j - 1], seq1[i - 2 : i]) + s
                >= T
            ):
                out.insert(0, (seq1[i - 2 : i], seq2[j - 1]))
                self._retrieve(
                    i - 2,
                    j - 1,
                    s + self.sigma_exp(seq2[j - 1], seq1[i - 2 : i]),
                    S,
                    T,
                    seq1,
                    seq2,
                    out,
                )
            elif S[i, j - 1] + self.sigma_skip(seq2[j - 1]) + s >= T:
                out.insert(0, ("-", seq2[j - 1]))
                self._retrieve(
                    i, j - 1, s + self.sigma_skip(seq2[j - 1]), S, T, seq1, seq2, out
                )
            elif S[i - 1, j] + self.sigma_skip(seq1[i - 1]) + s >= T:
                out.insert(0, (seq1[i - 1], "-"))
                self._retrieve(
                    i - 1, j, s + self.sigma_skip(seq1[i - 1]), S, T, seq1, seq2, out
                )
            elif S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1]) + s >= T:
                out.insert(0, (seq1[i - 1], seq2[j - 1]))
                self._retrieve(
                    i - 1,
                    j - 1,
                    s + self.sigma_sub(seq1[i - 1], seq2[j - 1]),
                    S,
                    T,
                    seq1,
                    seq2,
                    out,
                )
        return out

    def align(
        self, seq1: Sequence[Text], seq2: Sequence[Text], epsilon: float = 0
    ) -> Sequence[Tuple[Text, Text]]:
        """
        Computes the alignment of two symbol sequences.

        :param seq1: a sequence of symbols
        :param seq2: a sequence of symbols

        :type epsilon: float (0.0 to 1.0)
        :param epsilon: Adjusts threshold similarity score for near-optimal alignments
        :return: Alignment(s) of seq1 and seq2
        (Kondrak 2002: 51)
        """

        assert 0.0 <= epsilon <= 1.0, "Epsilon must be between 0.0 and 1.0."

        m = len(seq1)
        n = len(seq2)
        # This includes Kondrak's initialization of row 0 and column 0 to all 0s.
        S = np.zeros((m + 1, n + 1), dtype=float)
        # If i <= 1 or j <= 1, don't allow expansions as it doesn't make sense,
        # and breaks array and string indices. Make sure they never get chosen
        # by setting them to -inf.
        for i in range(1, m + 1):
            for j in range(1, n + 1):
                edit1 = S[i - 1, j] + self.sigma_skip(seq1[i - 1])
                edit2 = S[i, j - 1] + self.sigma_skip(seq2[j - 1])
                edit3 = S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1])
                if i > 1:
                    edit4 = S[i - 2, j - 1] + self.sigma_exp(
                        seq2[j - 1], seq1[i - 2 : i]
                    )
                else:
                    edit4 = -ReAline.inf
                if j > 1:
                    edit5 = S[i - 1, j - 2] + self.sigma_exp(
                        seq1[i - 1], seq2[j - 2 : j]
                    )
                else:
                    edit5 = -ReAline.inf
                S[i, j] = max(edit1, edit2, edit3, edit4, edit5, 0)
        T = (1 - epsilon) * np.amax(S)  # Threshold score for near-optimal alignments

        alignments = []
        for i in range(1, m + 1):
            for j in range(1, n + 1):
                if S[i, j] >= T:
                    alignments.append(self._retrieve(i, j, 0, S, T, seq1, seq2, []))
        return [pair for alignment in alignments for pair in alignment]

Class variables

var inf

Static methods

def phoneme_errors(alignments: Sequence[Tuple[str, str]]) ‑> PhonemeErrors

Counts insertions, deletions, and substitutions according to the output of Re-Aline

Expand source code
@staticmethod
def phoneme_errors(alignments: Sequence[Tuple[Text, Text]]) -> PhonemeErrors:
    """
    Counts insertions, deletions, and substitutions according to the output of Re-Aline
    """
    insertions = []
    deletions = []
    substitutions = []
    for pair in alignments:
        (phone_1, phone_2) = pair
        if phone_1 == "-":
            insertions.append(pair)
        elif phone_2 == "-":
            deletions.append(pair)
        elif phone_1 != phone_2 and phone_1 != "-" and phone_2 != "-":
            substitutions.append(pair)
    return PhonemeErrors(
        insertions=insertions, deletions=deletions, substitutions=substitutions
    )

Methods

def R(self, p: str, q: str) ‑> Sequence[str]

Return relevant features for segment comparsion. (Kondrak 2002: 54)

Expand source code
def R(self, p: Text, q: Text) -> Sequence[Text]:
    """
    Return relevant features for segment comparsion.
    (Kondrak 2002: 54)
    """
    consonants = self.consonants

    return self.R_c if p in consonants or q else self.R_v
def V(self, p: str) ‑> int

Return vowel weight if P is vowel. (Kondrak 2002: 54)

Expand source code
def V(self, p: Text) -> int:
    """
    Return vowel weight if P is vowel.
    (Kondrak 2002: 54)
    """
    return 0 if p in self.consonants else self.C_vwl
def align(self, seq1: Sequence[str], seq2: Sequence[str], epsilon: float = 0) ‑> Sequence[Tuple[str, str]]

Computes the alignment of two symbol sequences.

:param seq1: a sequence of symbols :param seq2: a sequence of symbols

:type epsilon: float (0.0 to 1.0) :param epsilon: Adjusts threshold similarity score for near-optimal alignments :return: Alignment(s) of seq1 and seq2 (Kondrak 2002: 51)

Expand source code
def align(
    self, seq1: Sequence[Text], seq2: Sequence[Text], epsilon: float = 0
) -> Sequence[Tuple[Text, Text]]:
    """
    Computes the alignment of two symbol sequences.

    :param seq1: a sequence of symbols
    :param seq2: a sequence of symbols

    :type epsilon: float (0.0 to 1.0)
    :param epsilon: Adjusts threshold similarity score for near-optimal alignments
    :return: Alignment(s) of seq1 and seq2
    (Kondrak 2002: 51)
    """

    assert 0.0 <= epsilon <= 1.0, "Epsilon must be between 0.0 and 1.0."

    m = len(seq1)
    n = len(seq2)
    # This includes Kondrak's initialization of row 0 and column 0 to all 0s.
    S = np.zeros((m + 1, n + 1), dtype=float)
    # If i <= 1 or j <= 1, don't allow expansions as it doesn't make sense,
    # and breaks array and string indices. Make sure they never get chosen
    # by setting them to -inf.
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            edit1 = S[i - 1, j] + self.sigma_skip(seq1[i - 1])
            edit2 = S[i, j - 1] + self.sigma_skip(seq2[j - 1])
            edit3 = S[i - 1, j - 1] + self.sigma_sub(seq1[i - 1], seq2[j - 1])
            if i > 1:
                edit4 = S[i - 2, j - 1] + self.sigma_exp(
                    seq2[j - 1], seq1[i - 2 : i]
                )
            else:
                edit4 = -ReAline.inf
            if j > 1:
                edit5 = S[i - 1, j - 2] + self.sigma_exp(
                    seq1[i - 1], seq2[j - 2 : j]
                )
            else:
                edit5 = -ReAline.inf
            S[i, j] = max(edit1, edit2, edit3, edit4, edit5, 0)
    T = (1 - epsilon) * np.amax(S)  # Threshold score for near-optimal alignments

    alignments = []
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if S[i, j] >= T:
                alignments.append(self._retrieve(i, j, 0, S, T, seq1, seq2, []))
    return [pair for alignment in alignments for pair in alignment]
def delta(self, p: str, q: str) ‑> int

Return weighted sum of difference between P and Q. (Kondrak 2002: 54)

Expand source code
def delta(self, p: Text, q: Text) -> int:
    """
    Return weighted sum of difference between P and Q.
    (Kondrak 2002: 54)
    """
    features = self.R(p, q)
    total = 0
    for f in features:
        total += self.diff(p, q, f) * self.salience[f]
    return total
def diff(self, p: str, q: str, f: str) ‑> int

Returns difference between phonetic segments P and Q for feature F. (Kondrak 2002: 52, 54)

Expand source code
def diff(self, p: Text, q: Text, f: Text) -> int:
    """
    Returns difference between phonetic segments P and Q for feature F.
    (Kondrak 2002: 52, 54)
    """
    p_features, q_features = self.feature_matrix[p], self.feature_matrix[q]
    return abs(
        self.similarity_matrix[p_features[f]]
        - self.similarity_matrix[q_features[f]]
    )
def sanity_check(self)

Sanity check that ensures necessary features are present

Expand source code
def sanity_check(self):
    """
    Sanity check that ensures necessary features are present
    """

    similarity_matrix = self.similarity_matrix
    feature_matrix = self.feature_matrix
    salience = self.salience
    consonants = self.consonants

    # ensure all salience values are found in feature matrix
    feats = set()
    feat_values = set()
    for phone_fm in feature_matrix.values():
        for (k, v) in phone_fm.items():
            feats.add(k)
            feat_values.add(v)

    assert (
        len(salience.keys() - feats) == 0
    ), f"salience and features for each sound in feature_matrix do not match: {salience.keys() - feats}"

    assert (
        len(similarity_matrix.keys() - feat_values) == 0
    ), f"similarity_matrix and feature values for each sound in feature_matrix do not match: {similarity_matrix.keys() - feat_values}"

    missing = [c for c in consonants if c not in feature_matrix.keys()]
    assert (
        len(missing) == 0
    ), f"Some consonants missing from feature_matrix: {missing}"
def sigma_exp(self, p: str, q: Sequence[str]) ‑> int

Returns score of an expansion/compression. (Kondrak 2002: 54)

Expand source code
def sigma_exp(self, p: Text, q: Sequence[Text]) -> int:
    """
    Returns score of an expansion/compression.
    (Kondrak 2002: 54)
    """
    q1 = q[0]
    q2 = q[1]
    return (
        self.C_exp
        - self.delta(p, q1)
        - self.delta(p, q2)
        - self.V(p)
        - max(self.V(q1), self.V(q2))
    )
def sigma_skip(self, p: str) ‑> int

Returns score of an indel of P. (Kondrak 2002: 54)

Expand source code
def sigma_skip(self, p: Text) -> int:
    """
    Returns score of an indel of P.
    (Kondrak 2002: 54)
    """
    return self.C_skip
def sigma_sub(self, p: str, q: str) ‑> int

Returns score of a substitution of P with Q. (Kondrak 2002: 54)

Expand source code
def sigma_sub(self, p: Text, q: Text) -> int:
    """
    Returns score of a substitution of P with Q.
    (Kondrak 2002: 54)
    """
    return self.C_sub - self.delta(p, q) - self.V(p) - self.V(q)