import gi gi.require_version("Gtk", "3.0") from gi.repository import Gtk, Gdk, GLib import sounddevice as sd import numpy as np import whisper import tempfile import threading import time import wave # Load Whisper model once model = whisper.load_model("medium") # Recording parameters SAMPLE_RATE = 22000 CHANNELS = 1 DURATION_LIMIT = 300 # max seconds class SpeechApp(Gtk.Window): def __init__(self): Gtk.Window.__init__(self, title="Speech Typing") self.set_border_width(20) self.set_default_size(400, 300) # Create a vertical box to hold our widgets vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) self.add(vbox) # Create a status bar self.status_bar = Gtk.Label(label="Status: Ready") # Modern alternatives to set_alignment(0, 0.5) self.status_bar.set_halign(Gtk.Align.START) # Horizontal alignment (left) self.status_bar.set_valign(Gtk.Align.CENTER) # Vertical alignment (center) # Style the status bar context = self.status_bar.get_style_context() context.add_class("status-bar") # Create a frame for the status bar status_frame = Gtk.Frame() status_frame.add(self.status_bar) vbox.pack_start(status_frame, False, False, 0) # Add the button to the box self.button = Gtk.Button(label="🎙️ Hold to Speak") self.button.connect("pressed", self.on_button_pressed) self.button.connect("released", self.on_button_released) vbox.pack_start(self.button, False, False, 0) # Create a scrolled window to contain the text view scrolled_window = Gtk.ScrolledWindow() scrolled_window.set_hexpand(True) scrolled_window.set_vexpand(True) vbox.pack_start(scrolled_window, True, True, 0) # Create the text view self.text_view = Gtk.TextView() self.text_view.set_editable(False) self.text_view.set_wrap_mode(Gtk.WrapMode.WORD) scrolled_window.add(self.text_view) # Get the buffer associated with the text view self.text_buffer = self.text_view.get_buffer() # Add a copy button self.copy_button = Gtk.Button(label="📋 Copy Text") self.copy_button.connect("clicked", self.on_copy_button_clicked) vbox.pack_start(self.copy_button, False, False, 0) self.recording = False self.audio = [] def on_button_pressed(self, widget): self.recording = True self.audio = [] self.start_time = time.time() # Clear the text buffer start_iter = self.text_buffer.get_start_iter() end_iter = self.text_buffer.get_end_iter() self.text_buffer.delete(start_iter, end_iter) # Update status bar with initial countdown self.update_status(f"Recording... {DURATION_LIMIT:.1f}s remaining") self.record_thread = threading.Thread(target=self.record_audio) self.record_thread.start() def on_button_released(self, widget): self.recording = False self.record_thread.join() # Update status bar self.update_status("Transcribing...") threading.Thread(target=self.transcribe_and_type).start() def record_audio(self): def callback(indata, frames, time_info, status): if self.recording and (time.time() - self.start_time < DURATION_LIMIT): self.audio.append(indata.copy()) else: raise sd.CallbackStop() with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS, callback=callback): # Check recording status every 100ms instead of sleeping for the full duration check_interval = 100 # milliseconds elapsed_time = 0 while self.recording and elapsed_time < DURATION_LIMIT * 1000: # Calculate remaining time in seconds remaining_time = (DURATION_LIMIT * 1000 - elapsed_time) / 1000 # Update status bar with countdown GLib.idle_add(self.update_status, f"Recording... {remaining_time:.1f}s remaining") sd.sleep(check_interval) elapsed_time += check_interval def update_status(self, status): # Helper method to update status self.status_bar.set_text(f"Status: {status}") return False # Return False to prevent being called again def transcribe_and_type(self): if not self.audio: # Update status back to Ready if no audio was recorded GLib.idle_add(self.update_status, "Ready") return audio_np = np.concatenate(self.audio, axis=0) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: with wave.open(tmpfile.name, 'wb') as wf: wf.setnchannels(CHANNELS) wf.setsampwidth(2) wf.setframerate(SAMPLE_RATE) wf.writeframes((audio_np * 32767).astype(np.int16).tobytes()) # In the transcribe_and_type method, modify the transcribe call: # Replace "en" with your desired language code result = model.transcribe(tmpfile.name, language="en") text = result["text"].strip() # Display the text in the text area end_iter = self.text_buffer.get_end_iter() if self.text_buffer.get_char_count() > 0: self.text_buffer.insert(end_iter, "\n\n") end_iter = self.text_buffer.get_end_iter() self.text_buffer.insert(end_iter, text) # Scroll to the end of the text view mark = self.text_buffer.create_mark(None, end_iter, False) self.text_view.scroll_to_mark(mark, 0.0, True, 0.0, 1.0) # Type the text using xdotool # subprocess.run(["xdotool", "type", "--delay", "50", text]) # Update status back to Ready GLib.idle_add(self.update_status, "Ready") def on_copy_button_clicked(self, widget): # Get start and end iterators for the entire buffer start_iter = self.text_buffer.get_start_iter() end_iter = self.text_buffer.get_end_iter() # Get all text from the buffer text = self.text_buffer.get_text(start_iter, end_iter, False) # Get the clipboard clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD) # Set the text to the clipboard clipboard.set_text(text, -1) # Make the text available to other applications clipboard.store() win = SpeechApp() win.connect("destroy", Gtk.main_quit) win.show_all() Gtk.main()