TTY OV  1
A cross platform python terminal
Loading...
Searching...
No Matches
normalise.py
Go to the documentation of this file.
1"""
2# +==== BEGIN polyguard =================+
3# LOGO:
4# input
5#
6# @#$%! hello
7# | |
8# +--+--+
9# |
10# v
11# +------------+
12# | POLY GUARD |
13# +------------+
14# | |
15# v v
16# BLOCKED PASSED
17# KO OK
18# /STOP
19# PROJECT: polyguard
20# FILE: normalise.py
21# CREATION DATE: 21-03-2026
22# LAST Modified: 19:51:6 21-03-2026
23# DESCRIPTION:
24# A module that provides a set of swearwords to listen to when filtering while allowing to toggle on and off different languages.
25# /STOP
26# COPYRIGHT: (c) Henry Letellier
27# PURPOSE: Normalisation utilities for language word-lists.
28# Provides the `Normalise` class for cleaning and loading newline-delimited
29# word lists. No compatibility wrappers are included — callers should import
30# the class directly.
31# // AR
32# +==== END polyguard =================+
33"""
34
35from typing import Iterable, Dict, Set, Optional
36from threading import Lock
37from display_tty import Disp, initialise_logger
38
39
41 """Normalization utilities for word-list processing.
42
43 Provides singleton static methods for cleaning and normalizing word lists.
44 Methods are intentionally simple and easy to test. They do not mutate
45 external state, and all operations are thread-safe with explicit locking
46 for logging consistency.
47 """
48 _instance_lock: Lock = Lock()
49 _instance: Optional["Normalise"] = None
50 disp: Disp = initialise_logger(__qualname__, False)
51
52 def __new__(cls) -> "Normalise":
53 with cls._instance_lock:
54 if cls._instance is None:
55 cls._instance = super().__new__(cls)
56 return cls._instance
57
58 @staticmethod
59 def normalize(words: Iterable[str]) -> Set[str]:
60 """Clean and normalize an iterable of words to a lowercase set.
61
62 Filters out None values, empty strings, and whitespace-only entries.
63 All output words are converted to lowercase and stripped of whitespace.
64 Uses class-level logging lock to ensure interleaved output consistency.
65
66 Args:
67 words: Iterable of string words to normalize.
68
69 Returns:
70 Set[str]: Set of normalized (lowercase, stripped) words.
71 """
72 Normalise.disp.log_debug("normalize() called")
73
74 result: Set[str] = set()
75
76 for w in words:
77 if w is None:
78 continue
79
80 text = w.strip()
81
82 if not text:
83 continue
84
85 result.add(text.lower())
86
87 return result
88
89 @staticmethod
90 def load_from_file(filepath: str, encoding: str = "utf-8") -> Set[str]:
91 """Load and normalize words from a newline-delimited text file.
92
93 Reads the file line-by-line and passes each line to normalize().
94 Missing or unreadable files return an empty set without raising.
95
96 Args:
97 filepath: Path to text file containing newline-delimited words.
98 encoding: Character encoding for file read. Defaults to utf-8.
99
100 Returns:
101 Set[str]: Normalized set of words from the file, or empty set if file not found.
102 """
103 Normalise.disp.log_debug(f"load_from_file called for {filepath}")
104
105 try:
106 with open(filepath, "r", encoding=encoding) as fh:
107 lines = fh.readlines()
108 except FileNotFoundError:
109 Normalise.disp.log_warning(f"Wordlist file not found: {filepath}")
110 return set()
111
112 return Normalise.normalize(lines)
113
114 @staticmethod
115 def load_mapping(mapping: "dict") -> Dict[object, Set[str]]:
116 """Normalize a mapping of keys to word iterables into key to word sets.
117
118 Transforms each value in the mapping through normalize(). The function
119 does not assume any particular key type; callers should validate keys
120 where necessary. None values are transformed to empty sets.
121
122 Args:
123 mapping: Dictionary with arbitrary keys and word iterables as values.
124
125 Returns:
126 Dict[object, Set[str]]: Mapping with same keys but normalized word sets as values.
127 """
128 Normalise.disp.log_debug("load_mapping called")
129
130 out: Dict[object, Set[str]] = {}
131
132 for key, words in mapping.items():
133 if words is None:
134 out[key] = set()
135 continue
136
137 out[key] = Normalise.normalize(words)
138
139 Normalise.disp.log_info(f"load_mapping produced {len(out)} entries")
140 return out
Set[str] normalize(Iterable[str] words)
Definition normalise.py:59
Dict[object, Set[str]] load_mapping("dict" mapping)
Definition normalise.py:115
Set[str] load_from_file(str filepath, str encoding="utf-8")
Definition normalise.py:90