Skip to content

Commit 68f23ea

Browse files
authored
Merge pull request #68 from gurgalex/massive_add
Add Many things + experimental OCR generic option
2 parents 3c7e9f3 + 39202d1 commit 68f23ea

29 files changed

+669
-225
lines changed

CHANGELOG.md

+20
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
# v0.12.0
2+
## change
3+
- help text is now only spoken whenever a UI screen is opened/reopened. Press ? to hear it again at any time
4+
- Text should be less jumbled when icons are present
5+
- There is now a pause between sections of text
6+
7+
## feat
8+
- Add experimental OCR option (uppercase O) to read text that is to the right of any green text
9+
- Press uppercase O to force the text to the right to be read
10+
- Seems to cover most boxes, however, the text is sometimes very verbose with lots of unneeded info
11+
- This is a stopgap measure until areas are identified for each UI screen
12+
- New quest sound. Hopefully it is less annoying.
13+
- The config file for Siralim Access can now be opened by pressing uppercase C. Siralim Access must still be closed for changes to take effect.
14+
- Add more UIs
15+
- Perk screen
16+
- Codex screens
17+
- Most of the reference list screens
18+
- Game Information - Only the nether boss screen
19+
20+
121
# v0.11.2
222
## fix
323
- Crash when Creatures screen would pop up a dialog box

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ A program to aid in making [Siralim Ultimate](https://store.steampowered.com/app
4646
| Speak secondary info | o |
4747
| Speak all available info | v |
4848
| Copy all available info | c |
49+
| Edit config file | C |
50+
| EXPERIMENTAL: OCR of text to the right of menu selection | O |
4951

5052
## Requirements
5153

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.11.2
1+
0.12.0

cli.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ def detect_siralim_ultimate_install_location() -> Path:
2020
raise e
2121

2222

23-
24-
2523
@click.group()
2624
def enter_cli():
2725
pass
@@ -37,11 +35,12 @@ def install():
3735
if ORIGINAL_BACKUP_FONT_FILEPATH.exists():
3836
print(f"Original font already backed up since {ORIGINAL_BACKUP_FONT_FILEPATH.name} exists")
3937
else:
40-
shutil.copy(FONT_FILE_ORIG_SIRALIM_ULTIMATE,ORIGINAL_BACKUP_FONT_FILEPATH)
38+
shutil.copy(FONT_FILE_ORIG_SIRALIM_ULTIMATE, ORIGINAL_BACKUP_FONT_FILEPATH)
4139

4240
shutil.copy(font_filepath, FONT_FILE_ORIG_SIRALIM_ULTIMATE)
4341
print(f"{font_filepath.name} has replaced {FONT_FILE_ORIG_SIRALIM_ULTIMATE.name}")
4442

43+
4544
@enter_cli.command()
4645
def restore():
4746
print("entered restore")
@@ -57,7 +56,6 @@ def restore():
5756
print(f"{FONT_FILE_ORIG_SIRALIM_ULTIMATE.name} restored")
5857

5958

60-
6159
if __name__ == "__main__":
6260
# needed to prevent infinite process spawns when using pyintaller
6361
import multiprocessing
@@ -71,11 +69,13 @@ def restore():
7169
before_send=before_send,
7270
)
7371
if len(sys.argv) == 1:
72+
from subot.main import read_version
73+
print(f"Siralim Access version = {read_version()}")
7474
start_bot()
7575
else:
7676
OCR_FONT = 'arialbd.ttf'
7777
FONT_FILE_ORIG_SIRALIM_ULTIMATE: Path = detect_siralim_ultimate_install_location().joinpath(
7878
"Eight-Bit-Dragon2.otf")
7979
ORIGINAL_BACKUP_FONT_FILEPATH: Path = FONT_FILE_ORIG_SIRALIM_ULTIMATE.parent.joinpath(
8080
"Eight-Bit-Dragon2-orig.otf")
81-
enter_cli()
81+
enter_cli()

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ cytolk~=0.1.10
2222
requests~=2.26.0
2323
semantic-version~=2.8.5
2424
pyclip~=0.5.4
25+
more-itertools~=8.11.0

resources/audio/credits.csv

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ npc-*.ogg,https://opengameart.org/content/sound-effects-pack,https://opengameart
55
teleportation-shrine/*.ogg,Thylacine Studios <https://www.thylacinestudios.com>,
66
snd_ChestOpening/*.wav,Thylacine Studios <https://www.thylacinestudios.com>,
77
nether-portal*.wav,https://opengameart.org/content/sci-fi-drone-loop
8-
summoning-*,https://opengameart.org/content/fire-crackling
8+
summoning-*,https://opengameart.org/content/fire-crackling
9+
quest-item/*,Thylacine Studios <https://www.thylacinestudios.com>,
119 KB
Binary file not shown.

resources/audio/quest-item/high.ogg

10.1 KB
Binary file not shown.

resources/audio/quest-item/low-2.ogg

8.56 KB
Binary file not shown.

resources/audio/quest-item/normal.ogg

9.2 KB
Binary file not shown.

subot/audio.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ def __init__(self, config: Config):
157157
channel=pygame.mixer.Channel(0),
158158
volume_adj=self.config.quest,
159159
sounds=SoundIndicator(
160-
low=pygame.mixer.Sound(AUDIO_DIR.joinpath("tone-low.wav").as_posix()),
161-
normal=pygame.mixer.Sound(AUDIO_DIR.joinpath("tone-normal.wav").as_posix()),
162-
high=pygame.mixer.Sound(AUDIO_DIR.joinpath("tone-high.wav").as_posix()),
160+
low=pygame.mixer.Sound(AUDIO_DIR.joinpath("quest-item/low-2.ogg").as_posix()),
161+
normal=pygame.mixer.Sound(AUDIO_DIR.joinpath("quest-item/normal.ogg").as_posix()),
162+
high=pygame.mixer.Sound(AUDIO_DIR.joinpath("quest-item/high.ogg").as_posix()),
163163
)
164164
),
165165

subot/main.py

+133-41
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,32 @@
1818
import webbrowser
1919

2020
from ctypes import windll
21+
22+
import win32clipboard
2123
from pynput import keyboard
2224
from pynput.keyboard import KeyCode
2325

2426
import win32process
27+
from winrt.windows.media.ocr import OcrResult
2528

26-
from subot import models, ocr
29+
from subot import models
30+
from subot.ui_areas.CodexGeneric import CodexGeneric
2731
from subot.ui_areas.CreatureReorderSelectFirst import OCRCreatureRecorderSelectFirst, OCRCreatureRecorderSwapWith
2832
from subot.ui_areas.OCRGodForgeSelect import OCRGodForgeSelectSystem
2933
from subot.ui_areas.OcrUnknownArea import OcrUnknownArea
34+
from subot.ui_areas.PerkScreen import PerkScreen
3035
from subot.ui_areas.creatures_display import OCRCreaturesDisplaySystem
3136
from subot.ui_areas.realm_select import OCRRealmSelect, SelectStep
3237
from subot.ui_areas.summoning import OcrSummoningSystem
33-
from subot.trait_info import TraitData, Creature
38+
from subot.trait_info import TraitData
3439
from subot.hang_monitor import HangMonitorWorker, HangMonitorChan, HangAnnotation, HangMonitorAlert, Shutdown
3540

3641
import cv2
3742
import numpy as np
3843
import mss
3944
from subot.settings import Session, GameControl
4045
import subot.settings as settings
41-
from subot.ocr import detect_green_text, detect_dialog_text, english_installed, detect_title, OCR
46+
from subot.ocr import detect_title, OCR
4247
from subot.ui_areas.ui_ocr_types import OCR_UI_SYSTEMS
4348
from subot.ui_areas.base import OCRMode
4449
import win32gui
@@ -69,7 +74,6 @@
6974

7075
user32 = windll.user32
7176

72-
7377
def set_dpi_aware():
7478
# makes functions return real pixel numbers instead of scaled values
7579
user32.SetProcessDPIAware()
@@ -242,6 +246,14 @@ class ActionType(enum.Enum):
242246
READ_ALL_INFO = auto()
243247
COPY_ALL_INFO = auto()
244248
HELP = auto()
249+
SCREENSHOT = auto()
250+
SILENCE = auto()
251+
OPEN_CONFIG_LOCATION = auto()
252+
FORCE_OCR = auto()
253+
254+
255+
def open_config_file():
256+
os.startfile(settings.config_file_path(), 'edit')
245257

246258

247259
class Bot:
@@ -259,6 +271,12 @@ def on_release(self, key):
259271
self.action_queue.put_nowait(ActionType.COPY_ALL_INFO)
260272
elif key == KeyCode.from_char('?'):
261273
self.action_queue.put_nowait(ActionType.HELP)
274+
elif key == KeyCode.from_char("P"):
275+
self.action_queue.put_nowait(ActionType.SCREENSHOT)
276+
elif key == KeyCode.from_char(self.config.open_config_key):
277+
self.action_queue.put_nowait(ActionType.OPEN_CONFIG_LOCATION)
278+
elif key == KeyCode.from_char("O"):
279+
self.action_queue.put_nowait(ActionType.FORCE_OCR)
262280

263281
def on_press(self, key):
264282
pass
@@ -732,6 +750,32 @@ def run(self):
732750
self.whole_window_thandle.copy_all_info()
733751
elif msg is ActionType.HELP:
734752
root.debug("got help request")
753+
self.whole_window_thandle.speak_help()
754+
elif msg is ActionType.SILENCE:
755+
self.audio_system.silence()
756+
elif msg is ActionType.OPEN_CONFIG_LOCATION:
757+
open_config_file()
758+
elif msg is ActionType.FORCE_OCR:
759+
self.whole_window_thandle.force_ocr()
760+
elif msg is ActionType.SCREENSHOT:
761+
762+
763+
def send_to_clipboard(clip_type, data):
764+
"""copy image to clipboard. Found at https://stackoverflow.com/a/62007792/17323787"""
765+
import win32clipboard
766+
win32clipboard.OpenClipboard()
767+
win32clipboard.EmptyClipboard()
768+
win32clipboard.SetClipboardData(clip_type, data)
769+
win32clipboard.CloseClipboard()
770+
771+
bgr_frame = self.whole_window_thandle.frame
772+
is_success, buffer = cv2.imencode(".bmp", bgr_frame)
773+
BMP_HEADER_LEN = 14
774+
bmp_data = buffer[BMP_HEADER_LEN:].tobytes()
775+
send_to_clipboard(win32clipboard.CF_DIB, bmp_data)
776+
777+
778+
root.info("copied whole frame bytes to clipboard")
735779
except queue.Empty:
736780
pass
737781

@@ -889,9 +933,9 @@ def __init__(self, incoming_frame_queue: Queue, queue_child_comm_send: queue.Que
889933
self._hang_monitor = hang_monitor
890934
self.hang_activity_sender: Optional[HangMonitorChan] = None
891935

892-
self.frame: np.typing.ArrayLike = np.zeros(
936+
self.frame: np.typing.NDArray = np.zeros(
893937
shape=(self.parent.su_client_rect.h, self.parent.su_client_rect.w, 3), dtype="uint8")
894-
self.gray_frame: np.typing.ArrayLike = np.zeros(
938+
self.gray_frame: np.typing.NDArray = np.zeros(
895939
shape=(self.parent.su_client_rect.h, self.parent.su_client_rect.w),
896940
dtype="uint8")
897941
self.ocr_engine: OCR = OCR()
@@ -901,40 +945,74 @@ def __init__(self, incoming_frame_queue: Queue, queue_child_comm_send: queue.Que
901945
self.frames_since_last_scan: int = 0
902946
self.got_first_frame: bool = False
903947

904-
def ocr_title(self):
905-
mask = detect_title(self.frame)
906-
resize_factor = 2
907-
mask = cv2.resize(mask, (mask.shape[1] * resize_factor, mask.shape[0] * resize_factor),
908-
interpolation=cv2.INTER_LINEAR)
909-
ocr_result = self.ocr_engine.recognize_cv2_image(mask)
910-
detected_system = OcrUnknownArea(audio_system=self.parent.audio_system, config=self.config,
948+
def determine_ocr_system(self, ocr_result: OcrResult) -> OCR_UI_SYSTEMS:
949+
unknown_system = OcrUnknownArea(audio_system=self.parent.audio_system, config=self.config,
911950
ocr_engine=self.ocr_engine)
912951
try:
913952
title = ocr_result.merged_text
914-
root.debug(f"title: {title}")
915-
if title.startswith("Select a creature to summon"):
916-
detected_system = OcrSummoningSystem(self.creature_data, self.parent.audio_system, self.config,
953+
root.debug(f"{title=}")
954+
lower_title = title.lower()
955+
first_word = ocr_result.lines[0].words[0]
956+
if lower_title.startswith("select a creature to summon"):
957+
return OcrSummoningSystem(self.creature_data, self.parent.audio_system, self.config,
917958
self.ocr_engine)
918959

919-
elif title.startswith("Creatures"):
920-
detected_system = OCRCreaturesDisplaySystem(self.creature_data, self.parent.audio_system,
921-
self.config, self.ocr_engine)
922-
elif title.startswith("Choose the Avatar"):
923-
detected_system = OCRGodForgeSelectSystem(audio_system=self.parent.audio_system, config=self.config,
960+
elif lower_title.startswith("creatures") and first_word.bounding_rect.x/self.frame.shape[1] > 0.13:
961+
return OCRCreaturesDisplaySystem(self.parent.audio_system, self.config, self.ocr_engine)
962+
elif lower_title.startswith("choose the avatar"):
963+
return OCRGodForgeSelectSystem(audio_system=self.parent.audio_system, config=self.config,
924964
ocr_engine=self.ocr_engine)
925965

926-
elif title.startswith("Choose the creature whose position"):
927-
detected_system = OCRCreatureRecorderSelectFirst(audio_system=self.parent.audio_system, config=self.config, ocr_engine=self.ocr_engine)
966+
elif lower_title.startswith("choose the creature whose position"):
967+
return OCRCreatureRecorderSelectFirst(audio_system=self.parent.audio_system, config=self.config, ocr_engine=self.ocr_engine)
928968

929-
elif title.startswith("Choose a creature to swap"):
930-
detected_system = OCRCreatureRecorderSwapWith(audio_system=self.parent.audio_system,
969+
elif lower_title.startswith("choose a creature to swap"):
970+
return OCRCreatureRecorderSwapWith(audio_system=self.parent.audio_system,
931971
config=self.config, ocr_engine=self.ocr_engine)
932972
elif step_type := _realm_select_step(title):
933973
root.debug(f"realm step - {step_type} {title}")
934-
detected_system = OCRRealmSelect(audio_system=self.parent.audio_system, config=self.config,
974+
return OCRRealmSelect(audio_system=self.parent.audio_system, config=self.config,
935975
ocr_engine=self.ocr_engine, step=step_type)
976+
elif lower_title.startswith("choose an item to purchase"):
977+
pass
978+
# Equip / Items -> Artifacts screen
979+
elif lower_title.startswith("artifacts ("):
980+
pass
981+
# Spell gems in inventory screen
982+
elif lower_title.startswith("spell gems ("):
983+
pass
984+
elif lower_title.startswith("choose a perk to rank"):
985+
return PerkScreen(self.parent.audio_system, self.config, self.ocr_engine)
986+
987+
# codex section
988+
elif lower_title.startswith(("artifact properties", "realm properties", "status effects", "spell gem properties", "traits", "skins", "gate of the gods", "gods", "guilds and false gods", "rodian creature masters", "macros", "nether bosses")):
989+
return CodexGeneric(audio_system=self.parent.audio_system, ocr_engine=self.ocr_engine, config=self.config, title=title)
990+
# todo:-Problematic codex entries "Castle", "Character", "Events", "Gods", "Items", "Realms", "Relics", "Spell Gems"
991+
elif lower_title.startswith("spells"):
992+
# todo: proper spell gem screen
993+
return CodexGeneric(audio_system=self.parent.audio_system, ocr_engine=self.ocr_engine, config=self.config, title=title)
994+
elif lower_title.startswith("skins"):
995+
pass
996+
elif lower_title.startswith("traits"):
997+
pass
998+
# codex artifact info
999+
elif lower_title.startswith("artifacts") and first_word.bounding_rect.x/self.frame.shape[1] < 0.1:
1000+
print("art screen")
1001+
elif lower_title.startswith("nether stones ("):
1002+
print("nether stone item screen")
1003+
9361004
except IndexError:
937-
pass
1005+
return unknown_system
1006+
return unknown_system
1007+
1008+
def ocr_title(self):
1009+
mask = detect_title(self.frame)
1010+
resize_factor = 2
1011+
mask = cv2.resize(mask, (mask.shape[1] * resize_factor, mask.shape[0] * resize_factor),
1012+
interpolation=cv2.INTER_LINEAR)
1013+
1014+
ocr_result = self.ocr_engine.recognize_cv2_image(mask)
1015+
detected_system = self.determine_ocr_system(ocr_result)
9381016

9391017
if detected_system.mode != self.ocr_ui_system.mode or self.ocr_ui_system.step != detected_system.step:
9401018
root.debug(f"new ocr system: {detected_system.mode}, {self.ocr_ui_system.mode}")
@@ -949,12 +1027,18 @@ def ocr_title(self):
9491027
def speak_interaction_info(self):
9501028
if not self.ocr_ui_system:
9511029
return
952-
elif isinstance(self.ocr_ui_system, OcrSummoningSystem):
953-
self.ocr_ui_system.speak_interaction()
954-
elif isinstance(self.ocr_ui_system, OcrUnknownArea):
955-
self.ocr_ui_system.speak_interaction()
956-
elif isinstance(self.ocr_ui_system, OCRRealmSelect):
1030+
try:
9571031
self.ocr_ui_system.speak_interaction()
1032+
except AttributeError:
1033+
pass
1034+
1035+
def speak_help(self):
1036+
if not self.ocr_ui_system:
1037+
return
1038+
try:
1039+
self.ocr_ui_system.speak_help()
1040+
except AttributeError:
1041+
root.warning(f"no help implemented for {self.ocr_ui_system.__name__}")
9581042

9591043
def ocr_screen(self):
9601044
self.ocr_title()
@@ -1027,14 +1111,22 @@ def run(self):
10271111
root.info("WindowAnalyzer thread shutting down")
10281112

10291113
def speak_all_info(self):
1030-
if isinstance(self.ocr_ui_system, OcrSummoningSystem):
1031-
self.ocr_ui_system.speak_detailed()
1032-
elif isinstance(self.ocr_ui_system, OCRRealmSelect):
1114+
try:
10331115
self.ocr_ui_system.speak_all_info()
1116+
except AttributeError:
1117+
pass
10341118

10351119
def copy_all_info(self):
1036-
if isinstance(self.ocr_ui_system, OcrSummoningSystem):
1120+
try:
10371121
self.ocr_ui_system.copy_detailed_text()
1122+
except AttributeError:
1123+
pass
1124+
1125+
def force_ocr(self):
1126+
try:
1127+
self.ocr_ui_system.force_ocr_content(self.gray_frame)
1128+
except AttributeError:
1129+
pass
10381130

10391131

10401132
class NearbyFrameGrabber(multiprocessing.Process):
@@ -1486,11 +1578,11 @@ def version_check(config, audio_system):
14861578
def init_bot() -> Bot:
14871579
config = settings.load_config()
14881580
audio_system = AudioSystem(config)
1489-
if not english_installed():
1490-
audio_system.speak_blocking(ocr.ENGLISH_NOT_INSTALLED_EXCEPTION.args[0])
1491-
root.error(ocr.ENGLISH_NOT_INSTALLED_EXCEPTION.args[0])
1492-
audio_system.speak_blocking("Shutting down")
1493-
sys.exit(1)
1581+
# if not english_installed():
1582+
# audio_system.speak_blocking(ocr.ENGLISH_NOT_INSTALLED_EXCEPTION.args[0])
1583+
# root.error(ocr.ENGLISH_NOT_INSTALLED_EXCEPTION.args[0])
1584+
# audio_system.speak_blocking("Shutting down")
1585+
# sys.exit(1)
14941586
version_check(config, audio_system)
14951587
is_minimized = True
14961588
while is_minimized:

0 commit comments

Comments
 (0)