Add hotkey support and xdotool integration for enhanced speech-to-text functionality
This commit is contained in:
236
speech2text-keyed.py
Executable file
236
speech2text-keyed.py
Executable file
@@ -0,0 +1,236 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import gi
|
||||
gi.require_version("Gtk", "3.0")
|
||||
from gi.repository import Gtk, Gdk, GLib
|
||||
|
||||
import sounddevice as sd
|
||||
import numpy as np
|
||||
import whisper
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import wave
|
||||
import subprocess
|
||||
|
||||
from pynput import keyboard
|
||||
|
||||
# Load Whisper model once
|
||||
model = whisper.load_model("medium")
|
||||
|
||||
# Recording parameters
|
||||
SAMPLE_RATE = 22000
|
||||
CHANNELS = 1
|
||||
DURATION_LIMIT = 300 # max seconds
|
||||
|
||||
class SpeechApp(Gtk.Window):
|
||||
def __init__(self):
|
||||
Gtk.Window.__init__(self, title="Speech Typing")
|
||||
self.set_border_width(20)
|
||||
self.set_default_size(400, 300)
|
||||
|
||||
# Create a vertical box to hold our widgets
|
||||
vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
|
||||
self.add(vbox)
|
||||
|
||||
# Create a status bar
|
||||
self.status_bar = Gtk.Label(label="Status: Ready")
|
||||
# Modern alternatives to set_alignment(0, 0.5)
|
||||
self.status_bar.set_halign(Gtk.Align.START) # Horizontal alignment (left)
|
||||
self.status_bar.set_valign(Gtk.Align.CENTER) # Vertical alignment (center)
|
||||
# Style the status bar
|
||||
context = self.status_bar.get_style_context()
|
||||
context.add_class("status-bar")
|
||||
# Create a frame for the status bar
|
||||
status_frame = Gtk.Frame()
|
||||
status_frame.add(self.status_bar)
|
||||
vbox.pack_start(status_frame, False, False, 0)
|
||||
|
||||
# Add the button to the box
|
||||
self.button = Gtk.Button(label="🎙️ Hold to Speak")
|
||||
self.button.connect("pressed", self.on_button_pressed)
|
||||
self.button.connect("released", self.on_button_released)
|
||||
vbox.pack_start(self.button, False, False, 0)
|
||||
|
||||
# Create a scrolled window to contain the text view
|
||||
scrolled_window = Gtk.ScrolledWindow()
|
||||
scrolled_window.set_hexpand(True)
|
||||
scrolled_window.set_vexpand(True)
|
||||
vbox.pack_start(scrolled_window, True, True, 0)
|
||||
|
||||
# Create the text view
|
||||
self.text_view = Gtk.TextView()
|
||||
self.text_view.set_editable(False)
|
||||
self.text_view.set_wrap_mode(Gtk.WrapMode.WORD)
|
||||
scrolled_window.add(self.text_view)
|
||||
|
||||
# Get the buffer associated with the text view
|
||||
self.text_buffer = self.text_view.get_buffer()
|
||||
|
||||
# Add a copy button
|
||||
self.copy_button = Gtk.Button(label="📋 Copy Text")
|
||||
self.copy_button.connect("clicked", self.on_copy_button_clicked)
|
||||
vbox.pack_start(self.copy_button, False, False, 0)
|
||||
|
||||
self.recording = False
|
||||
self.audio = []
|
||||
self.hotkey_pressed = set()
|
||||
self.hotkey_active = False
|
||||
|
||||
# Global hotkey listener for Right-Shift + Right-Ctrl
|
||||
self.listener = keyboard.Listener(
|
||||
on_press=self.on_key_press,
|
||||
on_release=self.on_key_release,
|
||||
)
|
||||
self.listener.daemon = True
|
||||
self.listener.start()
|
||||
|
||||
def on_button_pressed(self, widget):
|
||||
self.start_recording()
|
||||
|
||||
def on_button_released(self, widget):
|
||||
self.stop_recording()
|
||||
|
||||
def start_recording(self):
|
||||
if self.recording:
|
||||
return
|
||||
self.recording = True
|
||||
self.audio = []
|
||||
self.start_time = time.time()
|
||||
|
||||
# Clear the text buffer
|
||||
start_iter = self.text_buffer.get_start_iter()
|
||||
end_iter = self.text_buffer.get_end_iter()
|
||||
self.text_buffer.delete(start_iter, end_iter)
|
||||
|
||||
# Update status bar with initial countdown
|
||||
self.update_status(f"Recording... {DURATION_LIMIT:.1f}s remaining")
|
||||
self.record_thread = threading.Thread(target=self.record_audio)
|
||||
self.record_thread.start()
|
||||
|
||||
def stop_recording(self):
|
||||
if not self.recording:
|
||||
return
|
||||
self.recording = False
|
||||
self.record_thread.join()
|
||||
# Update status bar
|
||||
self.update_status("Transcribing...")
|
||||
threading.Thread(target=self.transcribe_and_type).start()
|
||||
|
||||
def record_audio(self):
|
||||
def callback(indata, frames, time_info, status):
|
||||
if self.recording and (time.time() - self.start_time < DURATION_LIMIT):
|
||||
self.audio.append(indata.copy())
|
||||
else:
|
||||
raise sd.CallbackStop()
|
||||
|
||||
with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS, callback=callback):
|
||||
# Check recording status every 100ms instead of sleeping for the full duration
|
||||
check_interval = 100 # milliseconds
|
||||
elapsed_time = 0
|
||||
while self.recording and elapsed_time < DURATION_LIMIT * 1000:
|
||||
# Calculate remaining time in seconds
|
||||
remaining_time = (DURATION_LIMIT * 1000 - elapsed_time) / 1000
|
||||
# Update status bar with countdown
|
||||
GLib.idle_add(self.update_status, f"Recording... {remaining_time:.1f}s remaining")
|
||||
sd.sleep(check_interval)
|
||||
elapsed_time += check_interval
|
||||
|
||||
def update_status(self, status):
|
||||
# Helper method to update status
|
||||
self.status_bar.set_text(f"Status: {status}")
|
||||
return False # Return False to prevent being called again
|
||||
|
||||
def transcribe_and_type(self):
|
||||
if not self.audio:
|
||||
# Update status back to Ready if no audio was recorded
|
||||
GLib.idle_add(self.update_status, "Ready")
|
||||
return
|
||||
|
||||
audio_np = np.concatenate(self.audio, axis=0)
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
|
||||
with wave.open(tmpfile.name, 'wb') as wf:
|
||||
wf.setnchannels(CHANNELS)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(SAMPLE_RATE)
|
||||
wf.writeframes((audio_np * 32767).astype(np.int16).tobytes())
|
||||
|
||||
# In the transcribe_and_type method, modify the transcribe call:
|
||||
# Replace "en" with your desired language code
|
||||
result = model.transcribe(tmpfile.name, language="en")
|
||||
text = result["text"].strip()
|
||||
|
||||
# Display the text in the text area
|
||||
end_iter = self.text_buffer.get_end_iter()
|
||||
if self.text_buffer.get_char_count() > 0:
|
||||
self.text_buffer.insert(end_iter, "\n\n")
|
||||
end_iter = self.text_buffer.get_end_iter()
|
||||
self.text_buffer.insert(end_iter, text)
|
||||
|
||||
# Scroll to the end of the text view
|
||||
mark = self.text_buffer.create_mark(None, end_iter, False)
|
||||
self.text_view.scroll_to_mark(mark, 0.0, True, 0.0, 1.0)
|
||||
|
||||
# Type the text into the current active application
|
||||
self.send_text_to_active_app(text)
|
||||
|
||||
# Update status back to Ready
|
||||
GLib.idle_add(self.update_status, "Ready")
|
||||
|
||||
def on_copy_button_clicked(self, widget):
|
||||
# Get start and end iterators for the entire buffer
|
||||
start_iter = self.text_buffer.get_start_iter()
|
||||
end_iter = self.text_buffer.get_end_iter()
|
||||
|
||||
# Get all text from the buffer
|
||||
text = self.text_buffer.get_text(start_iter, end_iter, False)
|
||||
|
||||
# Get the clipboard
|
||||
clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
|
||||
|
||||
# Set the text to the clipboard
|
||||
clipboard.set_text(text, -1)
|
||||
|
||||
# Make the text available to other applications
|
||||
clipboard.store()
|
||||
|
||||
def send_text_to_active_app(self, text):
|
||||
if not text:
|
||||
return
|
||||
try:
|
||||
subprocess.run(["xdotool", "type", "--delay", "50", text], check=False)
|
||||
except FileNotFoundError:
|
||||
# xdotool not installed; fail silently to avoid crashing the app
|
||||
pass
|
||||
|
||||
def on_key_press(self, key):
|
||||
if key == keyboard.Key.shift_r:
|
||||
self.hotkey_pressed.add("shift_r")
|
||||
elif key == keyboard.Key.ctrl_r:
|
||||
self.hotkey_pressed.add("ctrl_r")
|
||||
|
||||
if not self.hotkey_active and self.hotkey_pressed == {"shift_r", "ctrl_r"}:
|
||||
self.hotkey_active = True
|
||||
GLib.idle_add(self.start_recording)
|
||||
|
||||
def on_key_release(self, key):
|
||||
if key == keyboard.Key.shift_r:
|
||||
self.hotkey_pressed.discard("shift_r")
|
||||
elif key == keyboard.Key.ctrl_r:
|
||||
self.hotkey_pressed.discard("ctrl_r")
|
||||
|
||||
if self.hotkey_active and self.hotkey_pressed != {"shift_r", "ctrl_r"}:
|
||||
self.hotkey_active = False
|
||||
GLib.idle_add(self.stop_recording)
|
||||
|
||||
def on_destroy(self, widget):
|
||||
try:
|
||||
self.listener.stop()
|
||||
except Exception:
|
||||
pass
|
||||
Gtk.main_quit()
|
||||
|
||||
win = SpeechApp()
|
||||
win.connect("destroy", win.on_destroy)
|
||||
win.show_all()
|
||||
Gtk.main()
|
||||
Reference in New Issue
Block a user