Skip to content

Commit

Permalink
In this comment I have rewritten lexgo to use an external grep comman…
Browse files Browse the repository at this point in the history
…d process for word search. It is not much faster and simpler. I also added support for spanish, french, portuguese, and german
  • Loading branch information
joshkil committed Dec 4, 2024
1 parent d9fadb2 commit b09eaa6
Show file tree
Hide file tree
Showing 9 changed files with 2,290,185 additions and 39 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,12 @@ To run the tests:
```bash
python -m pytest
```
## Acknowledgements

The English lexicon used was taken from the [english-words](https://github.com/dwyl/english-words) repo. Thanks to the esteemed programer [@dwyl](https://github.com/dwyl) for his excelent work.

The Spanish lexicon used was taken from the [diccionario-español.txt](https://github.com/JorgeDuenasLerin/diccionario-espanol-txt) repo. Gracias al estimado programador [@JorgeDuenasLerin](https://github.com/JorgeDuenasLerin) por su trabajo excelente.

The French lexicon used was taken from the [French-Dictionary](https://github.com/hbenbel/French-Dictionary) repo. Merci au programmeur estimé [@hbenbel](https://github.com/hbenbel) pour son excellent travail.

The Portuguese lexicon used was taken from the [words-pt](https://github.com/jfoclpf/words-pt) repo. Obrigado ao estimado programador [@jfoclpf](https://github.com/jfoclpf) por seu excelente trabalho.
69 changes: 45 additions & 24 deletions lexgo/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import click
from lexgo import config
from lexgo import trie
import lexgo.trie
import subprocess
import pathlib

# Global Constants
ENGLISH_DICT_PATH = "eng_words_alpha.txt"

@click.command()
@click.argument(
Expand All @@ -18,7 +20,10 @@
@click.option("-xp", type=(str, int), multiple=True, default=[],
help="A letter and a position in which it must not appear.",
)
def lexgo(word, exclude, include, xp):
@click.option("-l", "--lang", type=click.Choice(['de', 'en', 'es', 'fr', 'pt'], case_sensitive=False), default='en',
help="The language dictionary to search.",
)
def lexgo(word, exclude, include, xp, lang):
'''
Search for WORD.
Expand All @@ -35,24 +40,40 @@ def lexgo(word, exclude, include, xp):
- search 3 letter words starting with b, without letters 't' or 'd',
with letter a, and without letters 'n' or 's' in the 3rd letter.
'''
config.load()
fwords = trie.find_words(word, config.dictionary)
candidates = []
if exclude or include or (len(xp) > 0):
for w in fwords:
candidate = True
for c in exclude:
if c in w:
candidate = False
for c in include:
if c not in w:
candidate = False
for tup in xp:
for c in tup[0]:
if w[tup[1]-1] == c:
candidate = False
if candidate:
candidates.append(w)
else:
candidates.extend(fwords)
click.echo(candidates)
# verify that word has only alpha '.' and '*'

# convert the simple '*' into regular expression".*"
word = str.replace(word, "*", ".*")

# command stack
command_stack = []

# build initial grep command
output = subprocess.Popen(["grep", "-w", "^" + word, config.DICT_PATHS[lang]],
stdout=subprocess.PIPE, text=True)
command_stack.append(output)

# add exclusions
if exclude:
p = subprocess.Popen(["grep", "-v", "[{}]".format(exclude)], stdin=command_stack[-1].stdout,
stdout=subprocess.PIPE, text=True)
command_stack.append(p)

# add inclusions
if include:
p = subprocess.Popen(["grep", "[{}]".format(include)], stdin=command_stack[-1].stdout,
stdout=subprocess.PIPE, text=True)
command_stack.append(p)

# add positional exclusions (xp)
if xp:
for c, p in xp:
gstr = "^" + "."*(p-1) + c
p = subprocess.Popen(["grep", "-v", gstr], stdin=command_stack[-1].stdout,
stdout=subprocess.PIPE, text=True)
command_stack.append(p)

# execute command stack
out, error = command_stack[-1].communicate()
if not error:
click.echo(out)
22 changes: 8 additions & 14 deletions lexgo/config.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
import lexgo.trie
import importlib.resources
import pathlib

# Global Constants
ENGLISH_DICT_PATH = "eng_words_alpha.txt"

# Global Variables
dictionary = lexgo.trie.Node("", False)

def load():
dictionary_path = ENGLISH_DICT_PATH
data_file = pathlib.Path(__file__).parent.joinpath("data", dictionary_path)
with open(data_file) as f:
# Read the dictionary file into a list
words = list(f)
lexgo.trie.setup(words, dictionary)
DATA_DIR_NAME = "data"
DICT_PATHS = {
"en" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "en.txt").absolute().as_posix(),
"es" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "es.txt").absolute().as_posix(),
"fr" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "fr.txt").absolute().as_posix(),
"pt" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "pt.txt").absolute().as_posix(),
"de" : pathlib.Path(__file__).parent.joinpath(DATA_DIR_NAME, "de.txt").absolute().as_posix()
}
Loading

0 comments on commit b09eaa6

Please sign in to comment.