From 2acaf27cd7f4f32bfe8bf7335690618e2417e744 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:54 -0400 Subject: vt: move unicode processing to a separate file This will make it easier to maintain. Also make it depend on CONFIG_CONSOLE_TRANSLATIONS. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-3-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index c35db4896c37..caf079bcb8c9 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,6 +28,7 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); +bool ucs_is_double_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -57,6 +58,11 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } + +static inline bool ucs_is_double_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit From e88391f730e46d208b7fb37b02611d24137af1ef Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:55 -0400 Subject: vt: properly support zero-width Unicode code points Zero-width Unicode code points are causing misalignment in vertically aligned content, disrupting the visual layout. Let's handle zero-width code points more intelligently. Double-width code points are stored in the screen grid followed by a white space code point to create the expected screen layout. When a double-width code point is followed by a zero-width code point in the console incoming bytestream (e.g., an emoji with a presentation selector) then we may replace the white space padding by that zero-width code point instead of dropping it. This maximize screen content information while preserving proper layout. If a zero-width code point is preceded by a single-width code point then the above trick is not possible and such zero-width code point must be dropped. VS16 (Variation Selector 16, U+FE0F) is special as it doubles the width of the preceding single-width code point. We handle that case by giving VS16 a width of 1 when that happens. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-4-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index caf079bcb8c9..7d778752dcef 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,6 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); +static inline bool ucs_is_zero_width(uint32_t cp) +{ + /* coming soon */ + return false; +} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp) { return false; } + +static inline bool ucs_is_zero_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit From 3a1ab63aa05b4736a7d30ae0a769385662f13def Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:57 -0400 Subject: vt: update ucs_width.c using gen_ucs_width.py This replaces ucs_width.c with the code generated by gen_ucs_width.py providing comprehensive tables for double-width and zero-width Unicode code points. Also make ucs_is_zero_width() effective. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-6-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 7d778752dcef..b3a911866662 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,11 +29,7 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -static inline bool ucs_is_zero_width(uint32_t cp) -{ - /* coming soon */ - return false; -} +bool ucs_is_zero_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) -- cgit From 54af55b990eda5a6a0140a3cded8094b42c0c3b7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:59 -0400 Subject: vt: create ucs_recompose.c using gen_ucs_recompose.py This provides ucs_recompose() to recompose two Unicode characters into a single character if possible. This is needed for the VT to properly display decomposed UTF8 sequences. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-8-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..4d3a34c288e5 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); +uint32_t ucs_recompose(uint32_t base, uint32_t combining); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } + +static inline uint32_t ucs_recompose(uint32_t base, uint32_t combining) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit From 6cccf837ac8d72e13c651f60d93545f9fb4e84ed Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:19 +0200 Subject: Revert "vt: create ucs_recompose.c using gen_ucs_recompose.py" This reverts commit 54af55b990eda5a6a0140a3cded8094b42c0c3b7. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 4d3a34c288e5..b3a911866662 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,7 +30,6 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); -uint32_t ucs_recompose(uint32_t base, uint32_t combining); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -70,11 +69,6 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } - -static inline uint32_t ucs_recompose(uint32_t base, uint32_t combining) -{ - return 0; -} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit From 67a4bb27461b0e3b39b27db688b5981b6eb62175 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:21 +0200 Subject: Revert "vt: update ucs_width.c using gen_ucs_width.py" This reverts commit 3a1ab63aa05b4736a7d30ae0a769385662f13def. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..7d778752dcef 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,7 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -bool ucs_is_zero_width(uint32_t cp); +static inline bool ucs_is_zero_width(uint32_t cp) +{ + /* coming soon */ + return false; +} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) -- cgit From d3e92076c1af713e65edac109499c25c37f38c16 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:24 +0200 Subject: Revert "vt: properly support zero-width Unicode code points" This reverts commit e88391f730e46d208b7fb37b02611d24137af1ef. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 7d778752dcef..caf079bcb8c9 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,11 +29,6 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -static inline bool ucs_is_zero_width(uint32_t cp) -{ - /* coming soon */ - return false; -} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -68,11 +63,6 @@ static inline bool ucs_is_double_width(uint32_t cp) { return false; } - -static inline bool ucs_is_zero_width(uint32_t cp) -{ - return false; -} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit From e42e607aefc4132d508a0e5724b5d0975d0a53e8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:25 +0200 Subject: Revert "vt: move unicode processing to a separate file" This reverts commit 2acaf27cd7f4f32bfe8bf7335690618e2417e744. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index caf079bcb8c9..c35db4896c37 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,7 +28,6 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); -bool ucs_is_double_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -58,11 +57,6 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } - -static inline bool ucs_is_double_width(uint32_t cp) -{ - return false; -} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit From 07bc3f442f47b4d158468c2e0146475bdf009091 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:04 -0400 Subject: vt: move unicode processing to a separate file This will make it easier to maintain. Also make it depend on CONFIG_CONSOLE_TRANSLATIONS. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-3-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index c35db4896c37..caf079bcb8c9 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,6 +28,7 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); +bool ucs_is_double_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -57,6 +58,11 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } + +static inline bool ucs_is_double_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit From 95b05de0a56699392e67590d000df76fedec609a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:05 -0400 Subject: vt: properly support zero-width Unicode code points Zero-width Unicode code points are causing misalignment in vertically aligned content, disrupting the visual layout. Let's handle zero-width code points more intelligently. Double-width code points are stored in the screen grid followed by a white space code point to create the expected screen layout. When a double-width code point is followed by a zero-width code point in the console incoming bytestream (e.g., an emoji with a presentation selector) then we may replace the white space padding by that zero-width code point instead of dropping it. This maximize screen content information while preserving proper layout. If a zero-width code point is preceded by a single-width code point then the above trick is not possible and such zero-width code point must be dropped. VS16 (Variation Selector 16, U+FE0F) is special as it typically doubles the width of the preceding single-width code point. We handle that case by giving VS16 a width of 1 instead of 0 when that happens. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-4-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index caf079bcb8c9..7d778752dcef 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,6 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); +static inline bool ucs_is_zero_width(uint32_t cp) +{ + /* coming soon */ + return false; +} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp) { return false; } + +static inline bool ucs_is_zero_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit From 54cda9201c673fd1c5de189d961670999232e49d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:08 -0400 Subject: vt: use new tables in ucs.c This removes the table from ucs.c and substitutes the generated tables from ucs_width_table.h providing comprehensive ranges for double-width and zero-width Unicode code points. Also implements ucs_is_zero_width() to query the new zero-width table. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-7-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 7d778752dcef..b3a911866662 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,11 +29,7 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -static inline bool ucs_is_zero_width(uint32_t cp) -{ - /* coming soon */ - return false; -} +bool ucs_is_zero_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) -- cgit From b5c574995d842d241d810f3a6a3ebb03c52d57fa Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:11 -0400 Subject: vt: support Unicode recomposition Try replacing any decomposed Unicode sequence by the corresponding recomposed code point. Code point to glyph correspondance works best after recomposition, and this apply mostly to single-width code points therefore we can't preserve them in their decomposed form anyway. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-10-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..8167494229db 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); +u32 ucs_recompose(u32 base, u32 mark); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } + +static inline u32 ucs_recompose(u32 base, u32 mark) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit From fe26933cf1e151ce0a5d858c263e8dacb2f12cee Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:21 -0400 Subject: vt: add ucs_get_fallback() This is the code querying the newly introduced tables. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250507141535.40655-7-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/consolemap.h') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 8167494229db..6180b803795c 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -31,6 +31,7 @@ void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); u32 ucs_recompose(u32 base, u32 mark); +u32 ucs_get_fallback(u32 cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -75,6 +76,11 @@ static inline u32 ucs_recompose(u32 base, u32 mark) { return 0; } + +static inline u32 ucs_get_fallback(u32 cp) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit