TTY OV  1
A cross platform python terminal
Loading...
Searching...
No Matches
constants.py
Go to the documentation of this file.
1"""
2# +==== BEGIN polyguard =================+
3# LOGO:
4# input
5#
6# @#$%! hello
7# | |
8# +--+--+
9# |
10# v
11# +------------+
12# | POLY GUARD |
13# +------------+
14# | |
15# v v
16# BLOCKED PASSED
17# KO OK
18# /STOP
19# PROJECT: polyguard
20# FILE: constants.py
21# CREATION DATE: 20-03-2026
22# LAST Modified: 3:53:29 22-03-2026
23# DESCRIPTION:
24# A module that provides a set of swearwords to listen to when filtering while allowing to toggle on and off different languages.
25# /STOP
26# COPYRIGHT: (c) Henry Letellier
27# PURPOSE: This is the file containing the constants of the class.
28# // AR
29# +==== END polyguard =================+
30"""
31
32from enum import Enum
33from pathlib import Path
34from dataclasses import dataclass
35
36
37# Module paths
38# `MODULE_ROOT` points to the package root (the folder containing `src` and `data`).
39MODULE_ROOT = Path(__file__).resolve().parent.parent
40
41# Default location for the SQLite database inside the package `data` folder.
42# Callers may override this value when constructing `PolyGuard`.
43DEFAULT_DB_PATH = MODULE_ROOT / "data" / "polyguard.sqlite"
44
45DEFAULT_SOURCE_WORDS = MODULE_ROOT.parent / "wordlists"
46
47# Default maximum number of language caches to keep in memory per `PolyGuard`.
48# Tuneable: keeps memory bounded while allowing frequent languages to be cached.
49DEFAULT_CACHE_MAX_LANGS = 8
50
51
52@dataclass
54 """This is the class in charge of allowing the user to configure the languages they which to check for.
55 """
56 # English variants
57 en: bool = True
58 en_uk: bool = False
59 en_us: bool = False
60 en_au: bool = False
61
62 # Western European
63 fr: bool = True
64 fr_ca: bool = False
65 es: bool = True
66 es_es: bool = False
67 es_mx: bool = False
68 es_ar: bool = False
69 de: bool = True
70 de_at: bool = False
71 de_ch: bool = False
72 de_de: bool = False
73 it: bool = True
74 it_ch: bool = False
75 it_it: bool = False
76 pt: bool = True # generic Portuguese (Europeans usually mean pt-PT)
77 pt_pt: bool = False
78 pt_br: bool = False
79 pt_ao: bool = False
80 nl: bool = True
81 nl_nl: bool = False
82 nl_be: bool = False
83 nl_sr: bool = False
84
85 # Central & Eastern Europe
86 pl: bool = False
87 pl_pl: bool = False
88 pl_ua: bool = False
89 pl_lt: bool = False
90 ro: bool = False
91 ro_ro: bool = False
92 ro_md: bool = False
93 ro_rs: bool = False
94 hu: bool = False
95 hu_hu: bool = False
96 hu_at: bool = False
97 hu_sk: bool = False
98 hu_rs: bool = False
99
100 # Nordic & other European languages
101 sv: bool = False
102 sv_se: bool = False
103 sv_fi: bool = False
104 sv_no: bool = False
105 sv_dk: bool = False
106 da: bool = False
107 da_dk: bool = False
108 da_se: bool = False
109 da_no: bool = False
110 da_gl: bool = False
111 no: bool = False
112 no_no: bool = False
113 no_se: bool = False
114 no_dk: bool = False
115 no_sa: bool = False
116 fi: bool = False
117 fi_fi: bool = False
118 fi_se: bool = False
119 fi_ru: bool = False
120 fi_ee: bool = False
121 el: bool = False # Greek
122 el_gr: bool = False
123 el_cy: bool = False
124 el_tr: bool = False
125 el_it: bool = False
126 el_al: bool = False
127
128 # Other common regional languages
129 tr: bool = False # Turkish (commonly encountered in parts of Europe)
130 tr_tr: bool = False
131 tr_cy: bool = False
132 tr_bg: bool = False
133 tr_gr: bool = False
134 tr_mk: bool = False
135 ru: bool = False # Russian (widely understood in some regions)
136 ru_ru: bool = False
137 ru_by: bool = False
138 ru_kz: bool = False
139 ru_ua: bool = False
140 ru_md: bool = False
141
142 # Misc / special flags
143 brainrot: bool = False
144 brainrot_twitch: bool = False
145 brainrot_tiktok: bool = False
146 brainrot_gaming: bool = False
147 brainrot_alpha: bool = False
148 brainrot_discord: bool = False
149 other: bool = False
150
151
152class Langs(Enum):
153 # English variants
154 EN = "en"
155 EN_UK = "en_uk"
156 EN_US = "en_us"
157 EN_AU = "en_au"
158
159 # Romance languages
160 FR = "fr"
161 FR_CA = "fr_ca"
162 ES = "es"
163 ES_ES = "es_es"
164 ES_MX = "es_mx"
165 ES_AR = "es_ar"
166 IT = "it"
167 IT_IT = "it_it"
168 IT_CH = "it_ch"
169 PT = "pt"
170 PT_PT = "pt_pt"
171 PT_BR = "pt_br"
172 PT_AO = "pt_ao"
173
174 # Germanic languages
175 DE = "de"
176 DE_DE = "de_de"
177 DE_AT = "de_at"
178 DE_CH = "de_ch"
179 NL = "nl"
180 NL_NL = "nl_nl"
181 NL_BE = "nl_be"
182 NL_SR = "nl_sr"
183
184 # Central & Eastern European languages
185 PL = "pl"
186 PL_PL = "pl_pl"
187 PL_UA = "pl_ua"
188 PL_LT = "pl_lt"
189 RO = "ro"
190 RO_RO = "ro_ro"
191 RO_MD = "ro_md"
192 RO_RS = "ro_rs"
193 HU = "hu"
194 HU_HU = "hu_hu"
195 HU_AT = "hu_at"
196 HU_SK = "hu_sk"
197 HU_RS = "hu_rs"
198
199 # Nordic languages
200 SV = "sv"
201 SV_SE = "sv_se"
202 SV_FI = "sv_fi"
203 SV_NO = "sv_no"
204 SV_DK = "sv_dk"
205 DA = "da"
206 DA_DK = "da_dk"
207 DA_SE = "da_se"
208 DA_NO = "da_no"
209 DA_GL = "da_gl"
210 NO = "no"
211 NO_NO = "no_no"
212 NO_SE = "no_se"
213 NO_DK = "no_dk"
214 NO_SA = "no_sa"
215 FI = "fi"
216 FI_FI = "fi_fi"
217 FI_SE = "fi_se"
218 FI_RU = "fi_ru"
219 FI_EE = "fi_ee"
220
221 # Southern European languages
222 EL = "el"
223 EL_GR = "el_gr"
224 EL_CY = "el_cy"
225 EL_TR = "el_tr"
226 EL_IT = "el_it"
227 EL_AL = "el_al"
228
229 # Other commonly encountered languages
230 TR = "tr"
231 TR_TR = "tr_tr"
232 TR_CY = "tr_cy"
233 TR_BG = "tr_bg"
234 TR_GR = "tr_gr"
235 TR_MK = "tr_mk"
236 RU = "ru"
237 RU_RU = "ru_ru"
238 RU_BY = "ru_by"
239 RU_KZ = "ru_kz"
240 RU_UA = "ru_ua"
241 RU_MD = "ru_md"
242
243 # Brainrot (Gen Alpha internet slang)
244 BRAINROT = "brainrot"
245 BRAINROT_TWITCH = "brainrot_twitch"
246 BRAINROT_TIKTOK = "brainrot_tiktok"
247 BRAINROT_GAMING = "brainrot_gaming"
248 BRAINROT_ALPHA = "brainrot_alpha"
249 BRAINROT_DISCORD = "brainrot_discord"
250
251 OTHER = "other"
252
253
254COMMAND_TOKEN: str = ":"
255COMMAND_TOKEN_LENGTH: int = len(COMMAND_TOKEN)
256
257# REPL / CLI textual constants
258POLY_BOOT_MSG = (
259 "polyguard — interactive mode\n"
260 f"Type '{COMMAND_TOKEN}help' for a short list of commands, '{COMMAND_TOKEN}man' for more details.\n"
261 f"Enter a word to test it; '{COMMAND_TOKEN}exit' or '{COMMAND_TOKEN}quit' to leave."
262)
263
264POLY_HELP_TEXT = (
265 f"Commands (prefix with '{COMMAND_TOKEN}' e.g. '{COMMAND_TOKEN}help'):\n"
266 f" {COMMAND_TOKEN}help Short help text (this message)\n"
267 f" {COMMAND_TOKEN}man Longer manual describing usage and options\n"
268 f" {COMMAND_TOKEN}exit, {COMMAND_TOKEN}quit Leave the REPL\n"
269 f" {COMMAND_TOKEN}db Show configured DB path\n"
270 " <word> Type a word to check it (no prefix required)\n"
271 f" {COMMAND_TOKEN}log <on/off> Toggle logging output\n"
272 f" {COMMAND_TOKEN}langopt <lang> <on/off> Enable/disable a language in your config\n"
273 f" {COMMAND_TOKEN}langs List languages available in the DB (with counts)\n"
274 f" {COMMAND_TOKEN}langstatus Show which languages are enabled in your config\n"
275 f" {COMMAND_TOKEN}word <w> [<lang>] Check a word optionally for a specific language\n"
276)
277
278POLY_MAN_TEXT = (
279 "polyguard manual\n\n"
280 "This REPL accepts single-word queries and returns whether the word\n"
281 "is considered a swearword according to the configured language lists.\n\n"
282 "If started with --db-path, that DB will be used; otherwise the package\n"
283 "default DB is used. You can also pipe words via stdin for batch checks.\n\n"
284 "Command prefixing:\n"
285 " To avoid conflicts with words that match command names, commands must be\n"
286 f" prefixed with '{COMMAND_TOKEN}' (for example '{COMMAND_TOKEN}langs' or '{COMMAND_TOKEN}langopt en_uk off'). Any input\n"
287 f" that does not start with '{COMMAND_TOKEN}' is treated as a word to check.\n\n"
288 "Additional commands:\n"
289 f" {COMMAND_TOKEN}log <on/off> Turn logging on or off for the running session.\n"
290 f" {COMMAND_TOKEN}langopt <lang> <on/off> Temporarily enable or disable a language in your session.\n"
291 f" {COMMAND_TOKEN}langs Show languages present in the DB and word counts.\n"
292 f" {COMMAND_TOKEN}langstatus Show which languages are currently enabled in your config.\n"
293 f" {COMMAND_TOKEN}word <w> [<lang>] Check <w> in either your current config or a specific language.\n"
294)
295
296POLY_PROMPT = "polyguard> "
297STATUS_BLOCKED = "BLOCKED"
298STATUS_OK = "OK"
299DB_PATH_FMT = "DB path: {path}"
300
301TOKENISER_TABLE = str.maketrans('', '', '\n\r\t\f')