Merge pull request #54 from gurgalex/fix_dialog_again

gurgalex · web-flow · commit 62d11a754d0f · 2021-09-15T03:09:28.000-05:00
Fix dialog again + allow for choosing default voice natively on Windows
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,9 @@
+#v0.9.26
+# feat
+- The SAPI voice is now controlled by the Windows control panel.
+  - Go to Control Panel -> Speech Recognition -> Text to Speech -> Voice selection
+# fix
+  - dialog boxes and menu selections should now be spoken and stop speaking when closed now
 #v0.9.25
 # perf
 - Pause object detection, OCR, and frame capture when Siralim Ultimate is minimized and/or in the background
diff --git a/README.md b/README.md
@@ -29,3 +29,9 @@ A program to aid in making [Siralim Ultimate](https://store.steampowered.com/app
 
 # How to download
 The latest version of the installer can be found [here https://github.com/gurgalex/SiralimAccess/releases/latest](https://github.com/gurgalex/SiralimAccess/releases/latest)
+
+
+# FAQ
+## How to change the voice used?
+The SAPI voice is controlled by the Windows control panel.
+- Control Panel -> Speech Recognition -> Text to Speech -> Voice selection
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.9.25
+0.9.26
diff --git a/subot/audio.py b/subot/audio.py
@@ -205,7 +205,6 @@ def __init__(self, config: Config):
         self.Speaker = win32com.client.Dispatch("SAPI.SpVoice")
         # don't block the program when speaking. Cancel any pending speaking directions
         self.SVSFlag = 3  # SVSFlagsAsync = 1 + SVSFPurgeBeforeSpeak = 2
-        self.Speaker.Voice = self.Speaker.getVoices('Name=Microsoft Zira Desktop').Item(0)
         self.Speaker.Rate = 5
 
     def play_sound(self, audio_tile: AudioLocation, sound_type: SoundType):
@@ -268,6 +267,9 @@ def speak_blocking(self, text):
     def speak_nonblocking(self, text):
         self.Speaker.Speak(text, self.SVSFlag)
 
+    def silence(self):
+        self.speak_nonblocking(" ")
+
     def get_available_sounds(self) -> dict[SoundType, SoundMapping]:
         return self.sound_mappings
 
diff --git a/subot/main.py b/subot/main.py
@@ -816,7 +816,8 @@ def __init__(self, incoming_frame_queue: Queue, out_quests_queue: Queue, su_clie
         # depends on text length
         self.duration_idle_green_text: int = 0
         self.first_idle_on_green_text: float = 0
-        self.has_green_text = False
+        self.has_green_text: bool = False
+        self.has_dialog_text: bool = False
         self.parent: Bot = parent
         self.incoming_frame_queue: Queue = incoming_frame_queue
         self.out_quests_sprites_queue: Queue = out_quests_queue
@@ -869,12 +870,17 @@ def ocr_screen(self):
         if self.config.ocr_read_dialog_boxes:
             self.speak_dialog_box()
 
+        menu_selection_or_dialog_text_present = self.has_green_text or self.has_dialog_text
+        if not menu_selection_or_dialog_text_present:
+            root.debug("Pauseing due to no menu selection or dialog text present on screen")
+            self.parent.audio_system.silence()
+
+
     def speak_dialog_box(self):
         mask = detect_dialog_text(self.frame)
         resize_factor = 2
         mask = cv2.resize(mask, (mask.shape[1] * resize_factor, mask.shape[0] * resize_factor), interpolation=cv2.INTER_LINEAR)
         ocr_result = recognize_cv2_image(mask)
-
         try:
             first_line = ocr_result["lines"][0]
             first_word = first_line["words"][0]
@@ -896,8 +902,10 @@ def speak_dialog_box(self):
             # no text was found
             self.last_dialog_text = ""
             self.first_idle_on_green_text = time.time()
-            if not self.has_green_text:
-                self.parent.audio_system.speak_nonblocking(" ")
+            if not self.has_green_text and not self.has_dialog_text:
+                root.debug("Pause, menu system. both not present")
+                self.parent.audio_system.silence()
+            self.has_dialog_text = False
             return
 
         time_idle_green_text = time.time() - self.first_idle_on_green_text
@@ -915,6 +923,7 @@ def speak_dialog_box(self):
         self.last_dialog_text = selected_text
 
         root.debug(f"dialog box text = {selected_text}")
+        self.has_dialog_text = True
         self.parent.audio_system.speak_nonblocking(selected_text)
 
     def speak_selected_menu_item(self):
@@ -979,7 +988,8 @@ def run(self):
                     self.paused = True
                     self.parent.clear_all_matches()
                     self.parent.speak_nearby_objects()
-                    self.parent.audio_system.speak_nonblocking(" ")
+                    root.debug("pause. Pause request")
+                    self.parent.audio_system.silence()
                     continue
                 elif isinstance(msg, Resume):
                     self.paused = False
@@ -1330,7 +1340,7 @@ def handle_realm_alignment(self, realm_alignment: Optional[Union[RealmAlignment,
             if realm_alignment.realm != self.parent.realm:
                 new_realm = realm_alignment.realm
                 if new_realm in models.UNSUPPORTED_REALMS:
-                    self.parent.audio_system.speak_nonblocking(f"Realm unsupported. {new_realm.realm_name}")
+                    self.parent.audio_system.speak_blocking(f"Realm unsupported. {new_realm.realm_name}")
                 self.parent.realm = realm_alignment.realm
 
                 self.parent.item_hashes = RealmSpriteHasher(floor_tiles=None)