In this comment I have rewritten lexgo to use an external grep comman…

…d process for word search. It is not much faster and simpler. I also added support for spanish, french, portuguese, and german
joshkil · Dec 4, 2024 · b09eaa6 · b09eaa6
1 parent d9fadb2
commit b09eaa6
Show file tree

Hide file tree

Showing 9 changed files with 2,290,185 additions and 39 deletions.
diff --git a/README.md b/README.md
@@ -41,3 +41,12 @@ To run the tests:
 ```bash
 python -m pytest
 ```
+## Acknowledgements
+
+The English lexicon used was taken from the [english-words](https://github.com/dwyl/english-words) repo. Thanks to the esteemed programer [@dwyl](https://github.com/dwyl) for his excelent work. 
+
+The Spanish lexicon used was taken from the [diccionario-español.txt](https://github.com/JorgeDuenasLerin/diccionario-espanol-txt) repo. Gracias al estimado programador [@JorgeDuenasLerin](https://github.com/JorgeDuenasLerin) por su trabajo excelente. 
+
+The French lexicon used was taken from the [French-Dictionary](https://github.com/hbenbel/French-Dictionary) repo. Merci au programmeur estimé [@hbenbel](https://github.com/hbenbel) pour son excellent travail.
+
+The Portuguese lexicon used was taken from the [words-pt](https://github.com/jfoclpf/words-pt) repo. Obrigado ao estimado programador [@jfoclpf](https://github.com/jfoclpf) por seu excelente trabalho.
diff --git a/lexgo/cli.py b/lexgo/cli.py
@@ -1,8 +1,10 @@
 import click
 from lexgo import config
-from lexgo import trie
-import lexgo.trie
+import subprocess
+import pathlib
 
+# Global Constants
+ENGLISH_DICT_PATH = "eng_words_alpha.txt"
 
 @click.command()
 @click.argument(
@@ -18,7 +20,10 @@
 @click.option("-xp", type=(str, int), multiple=True, default=[],
     help="A letter and a position in which it must not appear.",
 )
-def lexgo(word, exclude, include, xp):
+@click.option("-l", "--lang", type=click.Choice(['de', 'en', 'es', 'fr', 'pt'], case_sensitive=False), default='en',
+    help="The language dictionary to search.",
+)
+def lexgo(word, exclude, include, xp, lang):
     '''
     Search for WORD.
 
@@ -35,24 +40,40 @@ def lexgo(word, exclude, include, xp):
                - search 3 letter words starting with b, without letters 't' or 'd',
                  with letter a, and without letters 'n' or 's' in the 3rd letter.
     '''
-    config.load()
-    fwords = trie.find_words(word, config.dictionary)
-    candidates = []
-    if exclude or include or (len(xp) > 0):
-        for w in fwords:
-            candidate = True
-            for c in exclude:
-                if c in w:
-                    candidate = False
-            for c in include:
-                if c not in w:
-                    candidate = False
-            for tup in xp: 
-                for c in tup[0]:
-                    if w[tup[1]-1] == c:
-                        candidate = False
-            if candidate: 
-                candidates.append(w)
-    else:
-        candidates.extend(fwords)
-    click.echo(candidates)
+    # verify that word has only alpha '.' and '*'
+
+    # convert the simple '*' into regular expression".*"
+    word = str.replace(word, "*", ".*")
+
+    # command stack
+    command_stack = []
+
+    # build initial grep command
+    output = subprocess.Popen(["grep", "-w", "^" + word, config.DICT_PATHS[lang]], 
+                        stdout=subprocess.PIPE, text=True)
+    command_stack.append(output)
+
+    # add exclusions
+    if exclude:
+        p = subprocess.Popen(["grep", "-v", "[{}]".format(exclude)], stdin=command_stack[-1].stdout, 
+                                stdout=subprocess.PIPE, text=True)
+        command_stack.append(p)
+
+    # add inclusions
+    if include:
+        p = subprocess.Popen(["grep", "[{}]".format(include)], stdin=command_stack[-1].stdout, 
+                                stdout=subprocess.PIPE, text=True)
+        command_stack.append(p)
+
+    # add positional exclusions (xp)
+    if xp:
+        for c, p in xp:
+            gstr = "^" + "."*(p-1) + c
+            p = subprocess.Popen(["grep", "-v", gstr], stdin=command_stack[-1].stdout, 
+                                stdout=subprocess.PIPE, text=True)
+            command_stack.append(p)
+
+    # execute command stack
+    out, error = command_stack[-1].communicate()
+    if not error:
+        click.echo(out)
diff --git a/lexgo/config.py b/lexgo/config.py
@@ -1,17 +1,11 @@
-import  lexgo.trie
-import importlib.resources
 import pathlib
 
 # Global Constants
-ENGLISH_DICT_PATH = "eng_words_alpha.txt"
-
-# Global Variables
-dictionary = lexgo.trie.Node("", False)
-
-def load():
-    dictionary_path = ENGLISH_DICT_PATH
-    data_file = pathlib.Path(__file__).parent.joinpath("data", dictionary_path)
-    with open(data_file) as f:
-        # Read the dictionary file into a list
-        words = list(f)
-        lexgo.trie.setup(words, dictionary)
+DATA_DIR_NAME = "data"
+DICT_PATHS = {
+    "en" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "en.txt").absolute().as_posix(),
+    "es" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "es.txt").absolute().as_posix(),
+    "fr" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "fr.txt").absolute().as_posix(),
+    "pt" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "pt.txt").absolute().as_posix(),
+    "de" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "de.txt").absolute().as_posix()
+}