From cb1568be8a6bf33509437099a927b0507203a777 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Emilio=20Mart=C3=ADnez?= <emiliomartinez98@gmail.com>
Date: Tue, 26 Dec 2023 18:21:49 -0300
Subject: [PATCH] WIP: add generation script. Always breaks with "OSError:
 invalid pixel size"

---
 text_generation/generate_random_text.py | 42 +++++++++++++++++++++++++
 text_generation/text_utils.py           | 42 +++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 text_generation/generate_random_text.py
 create mode 100644 text_generation/text_utils.py

diff --git a/text_generation/generate_random_text.py b/text_generation/generate_random_text.py
new file mode 100644
index 0000000..5eab22b
--- /dev/null
+++ b/text_generation/generate_random_text.py
@@ -0,0 +1,42 @@
+import os
+from datetime import date
+import random
+import string
+from text_utils import generate_random_txt_img
+
+NUM_IMAGES = 10
+NUM_CHARACTERS = 25000
+IMG_SHAPE = (1600,900)
+TEXT_SIZE = 22
+
+today = date.today()
+
+# Month abbreviation, day and year	
+save_path = today.strftime("%b-%d-%Y")
+
+if not os.path.exists(save_path):
+    os.mkdir(save_path)
+else:
+    i = 2
+    save_path_tmp = save_path + str(i)
+    while os.path.exists(save_path_tmp):
+        i+=1
+        save_path_tmp = save_path + str(i)
+
+images_name = "generated_text"
+
+for i in range(NUM_IMAGES):
+
+    text = ''.join(random.choices(string.ascii_letters +
+                                string.digits, k=NUM_CHARACTERS))
+    
+    text_color = random.choices(["black","white"], weights=(70, 30), k=1)[0]
+    background_color = random.choices(["black","white"], weights=(30, 70), k=1)[0]
+
+    generate_random_txt_img(text, 
+                            IMG_SHAPE, 
+                            TEXT_SIZE, 
+                            text_color, 
+                            background_color, 
+                            os.path.join(save_path,images_name+str(i)+".png"))
+
diff --git a/text_generation/text_utils.py b/text_generation/text_utils.py
new file mode 100644
index 0000000..850cf92
--- /dev/null
+++ b/text_generation/text_utils.py
@@ -0,0 +1,42 @@
+from PIL import Image, ImageDraw, ImageFont
+import random
+import string
+from matplotlib import font_manager
+
+def generate_random_txt_img(text, img_shape, text_size, text_color, background_color, save_path):
+    # Create white plain image
+    imagen = Image.new("RGB", img_shape, background_color)
+    dibujo = ImageDraw.Draw(imagen)
+
+    # Compute amount of lines depending on image shape and number of characters
+    N_total = len(text)
+    N_lines = N_total//img_shape[1]
+    N_horizontal = img_shape[0]//(text_size)
+
+    # Get system font types
+    system_fonts = font_manager.findSystemFonts()
+    # Filter out some non-readable fonts
+    ttf_fonts = [font for font in system_fonts if ((".ttf" in font) and ("lohit" not in font) and ("kacst" not in font)) and  ("Navilu" not in font) and ("telu" not in font) and ("lyx" not in font) and ("malayalam" not in font) and ("tlwg" not in font) and ("samyak" not in font) and ("droid" not in font) and ("kalapi" not in font) and ("openoffice" not in font) and ("orya" not in font)]
+
+    # Write over image one font per line
+    for iter in range(N_lines):
+        rnd_font_index = random.randint(0,len(ttf_fonts)-1)
+        random_font = ttf_fonts[rnd_font_index]
+        # print(f"Font N {iter}: {random_font}")
+
+        # Load text font and set size
+        fuente = ImageFont.truetype(font=random_font, size=text_size)
+        # Get line text
+        texto_linea = text[iter * N_horizontal : (iter+1) * N_horizontal]
+
+        # Adjust text position
+        posicion_texto = ((imagen.width - fuente.getsize(texto_linea)[0]) // 2, 
+                          int(1.5* iter * text_size)
+                          )
+
+        # Write text
+        dibujo.text(posicion_texto, texto_linea, font=fuente, fill=text_color)
+
+    # Save image
+    imagen.save(save_path)
+