Skip to content

Commit f0c0f99

Browse files
committed
better search functionality for surah
1 parent 2cda5a9 commit f0c0f99

File tree

5 files changed

+163
-39
lines changed

5 files changed

+163
-39
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ bot/utils/__pycache__/
99
test*
1010
users.json
1111
realUsers.json
12-
*.zip
12+
*.zip
13+
realUsers.*
14+
activeUsers.json

bot/handlers/message/handleMessage.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,13 @@ def checkSurah(userID, text: str):
7878
</b>
7979
"""
8080

81-
for i in text.lower().replace(" ", ""):
82-
if i not in string.ascii_lowercase:
83-
return {"reply": defaultReply, "buttons": None}
8481

8582
res: list = Quran.searchSurah(text)
8683
if not res:
8784
return {"reply": defaultReply, "buttons": None}
8885

8986
buttons = []
90-
for surah, number in res:
87+
for number, surah in res:
9188
buttons.append(
9289
InlineKeyboardButton(
9390
f"{number} {surah}", callback_data=f"selectedSurah {number}"

bot/quran.py

+3-33
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import json
33
import secrets
44

5+
from bot.utils.searchSurah import fuzzySearchSurah
6+
57

68
"""
79
Structure of json files in Data folder:
@@ -163,39 +165,7 @@ def getAyahNumberCount(self, surahNo: int):
163165
return len(self.DATA["english_2"][surahNo - 1])
164166

165167
def searchSurah(self, string):
166-
matching_strings = []
167-
exact_match = False
168-
string_list = sorted(self.SURAH_NAMES)
169-
170-
for s in string_list:
171-
a = s.split("-")[-1].lower()
172-
b = string.lower()
173-
c = b.replace("k", "q")
174-
if b == a:
175-
exact_match = True
176-
matching_strings = [s]
177-
break
178-
elif a.replace("'", "").strip() in string.lower():
179-
matching_strings.append(s)
180-
elif c == a:
181-
matching_strings.append(s)
182-
183-
if not exact_match:
184-
for s in string_list:
185-
s_lower = s.split("-")[-1].lower()
186-
string_lower = string.lower()
187-
if all(c in s_lower for c in string_lower):
188-
matching_strings.append(s)
189-
elif all(c in string_lower for c in s_lower):
190-
matching_strings.append(s)
191-
matching_strings = list({i: 0 for i in matching_strings})[:3]
192-
193-
data = [
194-
[surah, self.SURAH_NAMES.index(surah) + 1] for surah in matching_strings
195-
]
196-
data.sort(key=lambda x: x[1])
197-
198-
return data
168+
return fuzzySearchSurah(string)
199169

200170
def detectLanguage(self, text: str):
201171
if not text:

bot/utils/searchSurah.py

+154
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
from fuzzywuzzy import process
2+
import re
3+
4+
# I could just read the surah names from the json file
5+
# Maybe I'll do it later
6+
7+
surahNames = [
8+
"Al-Fatihah",
9+
"Al-Baqarah",
10+
"Ali 'Imran",
11+
"An-Nisa",
12+
"Al-Ma'idah",
13+
"Al-An'am",
14+
"Al-A'raf",
15+
"Al-Anfal",
16+
"At-Tawbah",
17+
"Yunus",
18+
"Hud",
19+
"Yusuf",
20+
"Ar-Ra'd",
21+
"Ibrahim",
22+
"Al-Hijr",
23+
"An-Nahl",
24+
"Al-Isra",
25+
"Al-Kahf",
26+
"Maryam",
27+
"Taha",
28+
"Al-Anbya",
29+
"Al-Hajj",
30+
"Al-Mu'minun",
31+
"An-Nur",
32+
"Al-Furqan",
33+
"Ash-Shu'ara",
34+
"An-Naml",
35+
"Al-Qasas",
36+
"Al-'Ankabut",
37+
"Ar-Rum",
38+
"Luqman",
39+
"As-Sajdah",
40+
"Al-Ahzab",
41+
"Saba",
42+
"Fatir",
43+
"Ya-Sin",
44+
"As-Saffat",
45+
"Sad",
46+
"Az-Zumar",
47+
"Ghafir",
48+
"Fussilat",
49+
"Ash-Shuraa",
50+
"Az-Zukhruf",
51+
"Ad-Dukhan",
52+
"Al-Jathiyah",
53+
"Al-Ahqaf",
54+
"Muhammad",
55+
"Al-Fath",
56+
"Al-Hujurat",
57+
"Qaf",
58+
"Adh-Dhariyat",
59+
"At-Tur",
60+
"An-Najm",
61+
"Al-Qamar",
62+
"Ar-Rahman",
63+
"Al-Waqi'ah",
64+
"Al-Hadid",
65+
"Al-Mujadila",
66+
"Al-Hashr",
67+
"Al-Mumtahanah",
68+
"As-Saf",
69+
"Al-Jumu'ah",
70+
"Al-Munafiqun",
71+
"At-Taghabun",
72+
"At-Talaq",
73+
"At-Tahrim",
74+
"Al-Mulk",
75+
"Al-Qalam",
76+
"Al-Haqqah",
77+
"Al-Ma'arij",
78+
"Nuh",
79+
"Al-Jinn",
80+
"Al-Muzzammil",
81+
"Al-Muddaththir",
82+
"Al-Qiyamah",
83+
"Al-Insan",
84+
"Al-Mursalat",
85+
"An-Naba",
86+
"An-Nazi'at",
87+
"'Abasa",
88+
"At-Takwir",
89+
"Al-Infitar",
90+
"Al-Mutaffifin",
91+
"Al-Inshiqaq",
92+
"Al-Buruj",
93+
"At-Tariq",
94+
"Al-A'la",
95+
"Al-Ghashiyah",
96+
"Al-Fajr",
97+
"Al-Balad",
98+
"Ash-Shams",
99+
"Al-Layl",
100+
"Ad-Duhaa",
101+
"Ash-Sharh",
102+
"At-Tin",
103+
"Al-'Alaq",
104+
"Al-Qadr",
105+
"Al-Bayyinah",
106+
"Az-Zalzalah",
107+
"Al-'Adiyat",
108+
"Al-Qari'ah",
109+
"At-Takathur",
110+
"Al-'Asr",
111+
"Al-Humazah",
112+
"Al-Fil",
113+
"Quraysh",
114+
"Al-Ma'un",
115+
"Al-Kawthar",
116+
"Al-Kafirun",
117+
"An-Nasr",
118+
"Al-Masad",
119+
"Al-Ikhlas",
120+
"Al-Falaq",
121+
"An-Nas",
122+
]
123+
124+
125+
def cleanText(text: str) -> str:
126+
text = re.sub(r'^(al |an )', '', text, flags=re.IGNORECASE)
127+
return re.sub(r'[^a-zA-Z]', '', text).lower()
128+
129+
130+
cleanedSurahNames = [cleanText(surah) for surah in surahNames]
131+
132+
133+
def fuzzySearchSurah(query: str) -> list[str]:
134+
cleanedQuery = cleanText(query)
135+
firstSuggestion = None
136+
for surah in cleanedSurahNames:
137+
if surah.endswith(query):
138+
firstSuggestion = surah
139+
break
140+
141+
matches = process.extract(cleanedQuery, cleanedSurahNames, limit=3)
142+
matches = [i[0] for i in matches]
143+
144+
if firstSuggestion:
145+
k = [firstSuggestion]
146+
for x in matches:
147+
if x not in k:
148+
k.append(x)
149+
matches = k[:3]
150+
151+
numbers = [cleanedSurahNames.index(match)+1 for match in matches]
152+
names = [surahNames[i-1] for i in numbers]
153+
154+
return zip(numbers, names)

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ python-telegram-bot[job-queue]==21.1.1
22
python-dotenv==1.0.0
33
flask==2.3.2
44
pymongo==4.6.1
5-
schedule==1.2.1
5+
schedule==1.2.1
6+
fuzzywuzzy==0.18.0

0 commit comments

Comments
 (0)