making an audiobook using pygame

Parshaw · Nov-30-2020, 08:35 AM

Hello all,
I am trying to make a audiobook project where I am using different libraries. In here I am trying to do:

pdf -> images -> text -> gTTS -> audiofile -> pygame mixer -> playing audio.

This is my general structure of project but last two processes are happening but not giving me audio. It didn't show me error also. Here is the code,

# Import the required module for text to speech conversion
from gtts import gTTS
import pygame
from PIL import Image
import pytesseract
import os
import glob
import PySimpleGUI as sg
import tkinter as tk
import fitz


# In this function we get first and last page, which we want the software to read
def get_text(value):
    string = value
    string = string.strip()
    if "-" in string:
        first_page_number = int(string.split("-")[0])
        last_page_number = int(string.split("-")[1])
    else:
        first_page_number = int(string)
        last_page_number = 0

    return first_page_number, last_page_number


def main():
    global e, first_page_number, last_page_number
    ##### Create directory for Text to speech software
    current_directory = os.getcwd()
    final_directory = os.path.join(current_directory, r'Text_to_speech_software')
    if not os.path.exists(final_directory):
        os.makedirs(final_directory)
    print(current_directory)
    print(final_directory)

    #### GUI Part #####

    # All the stuff inside your window.
    layout = [[sg.Text('Choose PDF File to read'), sg.Input(), sg.FileBrowse()],
              [sg.Text('Enter PDF Page number or range separated by - '), sg.InputText()],
              [sg.Button('Ok'), sg.Button('Cancel')]
              ]

    # Create the GUI Window Prompt
    window = sg.Window('Input', layout)
    valid = False
    # Event Loop to process "events" and get the "values" of the inputs
    while True:
        event, values = window.read()
        # Here we read the path of the pdf file
        pdf_to_read = values[0]

        if event in (None, 'Cancel'):  # if user closes window or clicks cancel
            print("Exitting")
            window.close()
            exit()

        if event == "Ok":

            if values[0] == "":
                sg.Popup("Enter value", "Enter PDF file to be transcribed ")
            if values[1] == "":
                sg.Popup("Enter value", "Enter page number(s) to be transcribed")

            if values[0] != "" and values[1] != "":
                for char in values[1]:
                    if char.isdigit() == False:
                        sg.Popup("Invalid value", "Enter valid number or numbers separated by -")
                        break
                    else:
                        valid = True
                        break
        # Break while loop if valid first and last page numbers received
        if valid == True:
            print('You entered ', values[1])
            break

    window.close()
    first_page_number, last_page_number = get_text(values[1])

    # In this bunch of code, we get permission to delete the folder if it already exists, where we intend to save our PDF images and audio
    image_directory = glob.glob(final_directory)
    for file in os.listdir(final_directory):
        filepath = os.path.join(final_directory, file)
        print(filepath)
        os.chmod(filepath, 0o777)
        os.remove(filepath)

    # Here we read desired PDF pages and store them as images in a folder
    doc = fitz.open(pdf_to_read)
    k = 1
    # If user wants to read a single page
    if last_page_number == 0:
        page = doc.loadPage(first_page_number - 1)  # number of page
        zoom_x = 2.0
        zoom_y = 2.0
        mat = fitz.Matrix(zoom_x, zoom_y)
        pix = page.getPixmap(matrix=mat)
        output = os.path.join(final_directory, r"image_to_read.png")
        pix.writePNG(output)

    # If user wants to read range of pages
    else:
        for i in range(first_page_number - 1, last_page_number):
            page = doc.loadPage(i)  # number of page
            zoom_x = 2.0
            zoom_y = 2.0
            mat = fitz.Matrix(zoom_x, zoom_y)
            pix = page.getPixmap(matrix=mat)
            output = os.path.join(final_directory, r"image_" + str(k) + "_to_read.png")
            pix.writePNG(output)
            k += 1

    print("Done")

    # Initialize the Pytesseract OCR software
    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

    mytext = []

    # Here we load the image(s) created in Text_to_speech folder and read the text in image via pytesseract Optical Character Recognition (OCR) software
    # thus reading text in images and giving us a string
    for file in os.listdir(final_directory):
        data = pytesseract.image_to_string(Image.open(os.path.join(final_directory, file)), lang="eng")
        data = data.replace("|",
                            "I")  # For some reason the image to text translation would put | instead of the letter I. So we replace | with I
        data = data.split('\n')
        mytext.append(data)

    # Language in which you want to convert
    language = 'en'

    print(mytext)

    # Here we make sure that the text is read correctly and we read it line by line. Because sometimes, text would end abruptly

    newtext = ""
    for text in mytext:
        for line in text:
            line = line.strip()
            # If line is small, ignore it
            if len(line.split(" ")) < 10 and len(line.split(" ")) > 0:
                newtext = newtext + " " + str(line) + "\n"

            elif len(line.split(" ")) < 2:
                pass
            else:
                if line[-1] != ".":
                    newtext = newtext + " " + str(line)
                else:
                    newtext = newtext + " " + line + "\n"

    print(newtext)

    # Passing the text and language to the engine,
    # here we have marked slow=False. Which tells
    # the module that the converted audio should
    # have a high speed
    myobj = gTTS(text=newtext, lang=language, slow=False)

    # Saving the converted audio in a mp3 file named pdf_audio.mp3
    myobj.save(os.path.join(final_directory, "pdf_audio.mp3"))

    # Here we load and play the audio file
    pygame.mixer.init()
    pygame.mixer.music.load(os.path.join(final_directory, "pdf_audio.mp3"))
    pygame.mixer.music.play(-1)

    ########## GUI END ########


if __name__ == '__main__':
    main()

Kindly, help me.

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	Help with: Audiobook Library/Copier Project	eleven43	0	1,434	Sep-14-2023, 04:17 PM Last Post: eleven43
	Pygame*import pygame ImportError: No module named pygame	CASPERHANISCH	1	11,284	Jun-05-2017, 09:50 PM Last Post: nilamo

making an audiobook using pygame

User Panel Messages

Announcements