2# +==== BEGIN polyguard =================+
21# CREATION DATE: 13-03-2026
22# LAST Modified: 3:36:1 22-03-2026
24# A module that provides a set of swearwords to listen to when filtering while allowing to toggle on and off different languages.
26# COPYRIGHT: (c) Henry Letellier
27# PURPOSE: This is the entry file of the module.
29# +==== END polyguard =================+
33from typing
import Any, Optional, List, Set, Dict
34from threading
import Lock
35from collections
import OrderedDict
38from display_tty
import Disp, initialise_logger
39from warnings
import warn
41from .
import constants
as POLY_CONST
42from .sqlite_handler
import SQLiteHandler
46 """Singleton profanity filter with multilingual support and LRU caching.
48 Manages a persistent connection to an SQLite database of language-specific
49 word lists. Provides thread-safe word detection with per-language caching
50 to optimize repeated lookups. Supports configurable language subsets and
51 can check single words or phrases.
54 This class uses the singleton pattern. Multiple instantiations return
58 _instance: Optional[
"PolyGuard"] =
None
59 _class_lock: Lock = Lock()
60 disp: Disp = initialise_logger(__qualname__,
False)
62 def __new__(cls, *args, **kwargs) -> "PolyGuard":
63 """Create or return singleton instance.
66 PolyGuard: The singleton instance.
73 def __init__(self, langs: POLY_CONST.LangConfig, db_path: Optional[str] =
None, success: int = 0, error: int = 1, log: bool =
True, debug: bool =
False) ->
None:
74 """Initialize the PolyGuard instance.
76 On first call, attempts to establish a persistent database connection.
77 If the connection fails, the instance will attempt to reconnect on demand.
80 langs (LangConfig): LangConfig instance specifying which languages to check.
81 db_path (Optional[str]): Path to the SQLite database. Default: None (package default).
82 success (int): Exit code for successful initialization. Default: 0.
83 error (int): Exit code for failures. Default: 1.
84 log (bool): Enable logging output. Default: True.
85 debug (bool): Enable debug-level logging. Default: False.
105 self.
disp.update_disp_debug(debug=debug)
108 self.
_lang_cache:
"OrderedDict[POLY_CONST.Langs, set]" = OrderedDict()
111 f
"PolyGuard initialised; db_path={self.db_path}; cache_limit={self._cache_limit}")
114 self.
disp.log_warning(
115 "Initial DB connection failed; will attempt on demand")
117 def __call__(self, *args: Any, **kwds: Any) -> int:
118 """Callable interface. Delegates to main().
121 int: Result code from main() (0 for success, non-zero for error).
126 """Sanitize and normalize input word for processing.
128 Strips whitespace, converts to lowercase, and validates non-empty.
131 word (str): Raw input word or phrase to sanitize.
134 Optional[str]: Lowercased, stripped word, or None if empty/invalid.
146 text_low = text.lower()
150 """Ensure database is initialized on first use."""
156 """Resolve language configuration, falling back to default if needed.
159 language (Optional[LangConfig]): Language config override. Default: None.
162 LangConfig: Provided language config or default instance config.
169 """Tokenize text by splitting on whitespace after removing delimiters.
171 Uses pre-computed translation table for fast processing. Employs CPython's
172 optimized .split() fast-path (any-whitespace split with empty filtering).
175 text (str): Text to tokenize (assumed already lowercased).
178 List[str]: List of non-empty token strings.
180 split_data = text.translate(POLY_CONST.TOKENISER_TABLE).split()
184 """Extract first profanity match from word or phrase.
186 Tokenizes input and checks each token against enabled language word lists.
187 Returns immediately on first match for efficiency.
190 word (str): The word or phrase to check.
191 languages_to_check (Optional[LangConfig]): Language config override. Default: None.
194 Optional[str]: First matching swearword token found, or None if none detected.
198 self.
disp.log_error(
"Initial caching failed, retuning early")
210 def is_a_swearword(self, word: str, *, languages_to_check: Optional[POLY_CONST.LangConfig] =
None) -> bool:
211 """Check if a word or phrase contains profanity.
213 Checks individual tokens in phrases and the full phrase itself.
214 Uses per-language LRU cache to optimize repeated lookups.
217 word (str): The word or phrase to check (whitespace-stripped).
218 languages_to_check (Optional[LangConfig]): Language config override. Default: None.
221 bool: True if any enabled language contains the word, False otherwise.
223 self.
disp.log_debug(f
"is_a_swearword called with word={word!r}")
226 self.
disp.log_error(
"Initial caching failed, retuning early")
240 """Retrieve all swearwords for enabled languages.
242 Returns cached word sets if loaded, otherwise queries database.
243 Useful for inspection, testing, or bulk operations.
246 languages (Optional[LangConfig]): Language config override. Default: None.
249 Dict[str, Set]: Dictionary mapping language names to sets of profanity words.
250 Empty dict if database connection unavailable.
256 "No DB connection available; aborting check"
259 for lang
in POLY_CONST.Langs:
260 lang_state = getattr(language_check, lang.value,
None)
261 if lang_state
is None:
262 self.
disp.log_warning(f
"{lang.value} is defined but not set")
264 if lang_state
is not None and lang_state
is False:
265 self.
disp.log_debug(f
"{lang.value} is set to not be retrieved")
268 if cache_node
is not None:
269 final[str(lang.name)] = cache_node
271 final[str(lang.name)] = self.
sqlitesqlite.get_words(lang)
274 def _check_token(self, text_low: str, languages: POLY_CONST.LangConfig) -> bool:
275 """Check if a single token exists in any enabled language's word list.
277 Internal method that performs the actual word lookup using cache and
278 database queries. Token must already be lowercased.
281 text_low (str): Lowercased token to search for.
282 languages (LangConfig): LangConfig specifying which languages to query.
285 bool: True if token found in any enabled language, False otherwise.
288 RuntimeError: If database connection becomes unavailable mid-check.
292 for lang
in POLY_CONST.Langs:
294 if getattr(languages, lang.value):
295 to_check.append(lang)
296 except AttributeError:
304 for lang
in to_check:
307 if cached
is not None:
311 except (KeyError, AttributeError):
314 if text_low
in cached:
315 self.
disp.log_debug(f
"Cache hit for lang={lang.value}")
326 "No DB connection available; aborting check"
337 except (sqlite3.Error, RuntimeError)
as exc:
338 self.
disp.log_error(f
"DB access failed in is_a_swearword: {exc}")
340 warn(f
"PolyGuard DB access failed: {exc}")
344 for lang, words
in loaded.items():
349 except (KeyError, AttributeError):
355 evicted_lang, _ = self.
_lang_cache.popitem(last=
False)
357 f
"Evicted lang from cache: {evicted_lang.value}")
358 except (KeyError, IndexError):
361 if text_low
in words:
363 f
"Match found after DB load for lang={lang.value}"
370 """Probe the database and preload enabled languages into cache.
372 Attempts to verify database accessibility, then preloads up to
373 cache_limit languages into memory for faster lookup.
376 int: Success code (0) if DB ready, error code otherwise.
379 self.
disp.log_debug(
"main() called to probe DB")
384 self.
disp.log_error(
"DB probe failed: cannot connect")
390 _ = self.
sqlitesqlite.get_words(next(iter(POLY_CONST.Langs)))
394 for lang
in POLY_CONST.Langs:
397 to_preload.append(lang)
398 except AttributeError:
402 for lang
in to_preload:
411 except (KeyError, AttributeError):
416 self.
disp.log_info(
"DB probe successful; ready")
419 except (sqlite3.Error, RuntimeError)
as exc:
421 self.
disp.log_error(f
"DB probe failed: {exc}")
423 warn(f
"PolyGuard failed to open DB '{self.db_path}': {exc}")
428 """Ensure a persistent SQLiteHandler is created and connected.
430 Creates a new handler if needed or reconnects an existing one.
431 Cleans up stale connections gracefully.
434 bool: True if connection is now open and usable, False otherwise.
441 except (sqlite3.Error, RuntimeError):
444 except (sqlite3.Error, RuntimeError):
450 str(self.
db_path), readonly=
True, log=self.
log)
454 except (sqlite3.Error, RuntimeError)
as exc:
455 self.
disp.log_error(f
"ensure_connection failed: {exc}")
460if __name__ ==
"__main__":
461 CONF = POLY_CONST.LangConfig()
POLY_CONST.LangConfig _determine_language_set(self, Optional[POLY_CONST.LangConfig] language)
List[str] _tokenify(self, str text)
Dict[str, Set] get_list_of_swearwords(self, *, Optional[POLY_CONST.LangConfig] languages=None)
Optional[SQLiteHandler] sqlite
POLY_CONST.LangConfig default_choice
Optional[str] extract_swearword_if_present(self, str word, *, Optional[POLY_CONST.LangConfig] languages_to_check=None)
"PolyGuard" __new__(cls, *args, **kwargs)
int __call__(self, *Any args, **Any kwds)
bool is_a_swearword(self, str word, *, Optional[POLY_CONST.LangConfig] languages_to_check=None)
bool ensure_connection(self)
bool _ensure_initialized(self)
None __init__(self, POLY_CONST.LangConfig langs, Optional[str] db_path=None, int success=0, int error=1, bool log=True, bool debug=False)
Optional[str] _sanify_word(self, str word)
bool _check_token(self, str text_low, POLY_CONST.LangConfig languages)