www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.learn - How-to: input/output "Japanese Characters"?

reply "Tyro[a.c.edwards]" <nospam home.com> writes:
I'm on a Japanese system attempting to input/output Japanese Characters
and cannot seem to accomplish it. How would I read
"憲法記念日 理念と現実、広がる格差" from stdin/file and
output the same 
to stdout/file?

Thanks,
Andrew
May 02 2009
parent reply Georg Wrede <georg.wrede iki.fi> writes:
Tyro[a.c.edwards] wrote:
 I'm on a Japanese system attempting to input/output Japanese Characters
 and cannot seem to accomplish it. How would I read
 "憲法記念日 理念と現実、広がる格差" from stdin/file and
output the same 
 to stdout/file?
import std.stdio; void main() { auto lin = readln(); writeln(lin); } Works with your Japanese strings. It just works(tm).
May 02 2009
next sibling parent "Tyro[a.c.edwards]" <nospam home.com> writes:
On 5/3/2009 7:41 AM, Georg Wrede wrote:
 Tyro[a.c.edwards] wrote:
 I'm on a Japanese system attempting to input/output Japanese Characters
 and cannot seem to accomplish it. How would I read
 "憲法記念日 理念と現実、広がる格差" from stdin/file and
output the
 same to stdout/file?
import std.stdio; void main() { auto lin = readln(); writeln(lin); } Works with your Japanese strings. It just works(tm).
Interesting... for whatever reason I thought I had to cast/convert to!dstring in order to get the correct output. Which kept resulting in: std.utf.UtfException: 4invalid UTF-8 sequence thank you very much.
May 02 2009
prev sibling parent reply "Carlos Smith" <carlos-smith sympatico.ca> writes:
"Georg Wrede" <georg.wrede iki.fi> a écrit dans le message de news:

 import std.stdio;

 void main()
 {
     auto lin = readln();
     writeln(lin);
 }

 Works with your Japanese strings. It just works(tm).
I am trying this on a North American WinXP station. I am using DMD V1. You will find a small program at the end of this message who does not work for me. The console use Lucida COnsole font. The CP is 65001. cmd.exe can read/write utf strings, but not my program. I want to be able to read an utf string from stdin. That string will contains characters > 256, ie glyphs you can only see with an unicode font. I have read some documentation on MSDN that confirm it can't work. So why does it work for you ? Can anyone, explain to me what am i doing wrong ? Thanks a lot... --------- program ---------- // // readln.d // testing readln() // // This file saved with BOM: FF FE // Edit with Lucida Console font // import std.stdio; import std.c.windows.windows; import std.c.locale; import std.utf; extern( Windows ) { HANDLE GetStdHandle( DWORD nStdHandle ); BOOL WriteConsoleW( HANDLE hConsoleOutput, in VOID* lpBuffer, DWORD nNumberOfCharsToWrite, DWORD* lpNumberOfCharsWritten, void* lpReserved = null ); BOOL WriteConsoleA( HANDLE hConsoleOutput, in VOID* lpBuffer, DWORD nNumberOfCharsToWrite, DWORD* lpNumberOfCharsWritten, void* lpReserved = null ); BOOL ReadConsoleW( HANDLE hConsoleInput, LPVOID lpBuffer, uint nNumberOfCharsToRead, uint* lpNumberOfCharsRead, void* pInputControl = null ); BOOL ReadConsoleA( HANDLE hConsoleInput, LPVOID lpBuffer, uint nNumberOfCharsToRead, uint* lpNumberOfCharsRead, void* pInputControl = null ); version (UNICODE) alias ReadConsoleA ReadConsole; else alias ReadConsoleW ReadConsole; } UINT CP, OCP; void DisplayCP() { writef("Console CP: %s\n", GetConsoleCP()); writef("Console Output CP: %s\n", GetConsoleOutputCP()); } void SaveCP() { CP = GetConsoleCP(); OCP = GetConsoleOutputCP(); } void SetUnicode() { SetConsoleCP( 65001 ); SetConsoleOutputCP( 65001 ); } void ResetCP() { SetConsoleCP( CP ); SetConsoleOutputCP( OCP ); } int main( ) { HANDLE cout; DWORD nw; cout = GetStdHandle(STD_OUTPUT_HANDLE); DisplayCP(); SaveCP(); SetUnicode(); DisplayCP(); // writef() do print Unicode string constant // next 5 lines contains utf chars. wchar[] utfstr = "←↑→↔↨∂∆∏∑−√∞∟∩∫≈═┼┴⌠⌐≥≤≡≠ѕєіїњљ\n"; writef("1: %s","←↑→↔↨∂∆∏∑−√∞∟∩∫≈═┼┴⌠⌐≥≤≡≠ѕєіїњљ\n"); writef("2: %s",utfstr); // so do WriteConsoleW() wchar[] utfstr3 = "3: ←↑→↔↨∂∆∏∑−√∞∟∩∫≈═┼┴⌠⌐≥≤≡≠ѕєіїњљ\n"; WriteConsoleW(cout, utfstr3.ptr, utfstr3.length, &nw); // but not readln()`ed strings writef("Type a line of Unicode characters:\n"); auto line = readln(); writef("line len: %s\n", line.length); writef("%s\n", "Next line is what You typed:"); writef("%s\n",line); writef("%s\n", "Your line again (with WriteConsoleA):"); WriteConsoleA(cout, line.ptr, line.length, &nw); writef("%s\n", "Your line again (with WriteConsoleW):"); WriteConsoleW(cout, line.ptr, line.length, &nw); writef("Done. Resetting code page.\n"); ResetCP(); DisplayCP(); return 0; } --------- eof ---------
May 03 2009
parent reply Georg Wrede <georg.wrede iki.fi> writes:
Carlos Smith wrote:
 "Georg Wrede" <georg.wrede iki.fi> a écrit dans le message de news:
 
 import std.stdio;

 void main()
 {
     auto lin = readln();
     writeln(lin);
 }

 Works with your Japanese strings. It just works(tm).
I am trying this on a North American WinXP station. I am using DMD V1.
Sorry, somebody else has to help, I'm not using Windows. :/
 You will find a small program at the end of this
 message who does not work for me.
 
 The console use Lucida COnsole font. The CP is 65001.
 cmd.exe can read/write utf strings, but not my
 program.
 
 I want to be able to read an utf string from stdin.
 That string will contains characters > 256, ie glyphs
 you can only see with an unicode font.
 
 I have read some documentation on MSDN that confirm
 it can't work. So why does it work for you ?
 
 Can anyone, explain to me what am i doing wrong ?
 
 Thanks a lot...
 
 --------- program ----------
 //
 //  readln.d
 //  testing readln()
 //
 //  This file saved with BOM: FF FE
 //  Edit with Lucida Console font
 //
 
 import std.stdio;
 import std.c.windows.windows;
 import std.c.locale;
 import std.utf;
 
 extern( Windows )
 {
  HANDLE GetStdHandle( DWORD nStdHandle );
  BOOL WriteConsoleW(
    HANDLE hConsoleOutput,
    in VOID* lpBuffer,
    DWORD nNumberOfCharsToWrite,
    DWORD* lpNumberOfCharsWritten,
    void* lpReserved = null );
  BOOL WriteConsoleA(
    HANDLE hConsoleOutput,
    in VOID* lpBuffer,
    DWORD nNumberOfCharsToWrite,
    DWORD* lpNumberOfCharsWritten,
    void* lpReserved = null );
  BOOL ReadConsoleW( HANDLE hConsoleInput, LPVOID lpBuffer, uint 
 nNumberOfCharsToRead,
    uint* lpNumberOfCharsRead, void* pInputControl = null );
  BOOL ReadConsoleA( HANDLE hConsoleInput, LPVOID lpBuffer,  uint 
 nNumberOfCharsToRead,
    uint* lpNumberOfCharsRead, void* pInputControl = null );
  version (UNICODE)
    alias ReadConsoleA ReadConsole;
  else
    alias ReadConsoleW ReadConsole;
 }
 
 
 UINT CP, OCP;
 
 void DisplayCP()
 {
  writef("Console CP: %s\n", GetConsoleCP());
  writef("Console Output CP: %s\n", GetConsoleOutputCP());
 }
 
 void SaveCP()
 {
  CP = GetConsoleCP();
  OCP = GetConsoleOutputCP();
 }
 
 void SetUnicode()
 {
  SetConsoleCP( 65001 );
  SetConsoleOutputCP( 65001 );
 }
 
 void ResetCP()
 {
  SetConsoleCP( CP );
  SetConsoleOutputCP( OCP );
 }
 
 int main( )
 {
  HANDLE cout;
  DWORD nw;
  cout = GetStdHandle(STD_OUTPUT_HANDLE);
 
  DisplayCP();
  SaveCP();
  SetUnicode();
  DisplayCP();
 
  // writef() do print Unicode string constant
  // next 5 lines contains utf chars.
  wchar[] utfstr = "←↑→↔↨∂∆∏∑−√∞∟∩∫≈═┼┴⌠⌐≥≤≡≠ѕєіїњљ\n";
  writef("1: %s","←↑→↔↨∂∆∏∑−√∞∟∩∫≈═┼┴⌠⌐≥≤≡≠ѕєіїњљ\n");
  writef("2: %s",utfstr);
  // so do WriteConsoleW()
  wchar[] utfstr3 = "3:
←↑→↔↨∂∆∏∑−√∞∟∩∫≈═┼┴⌠⌐≥≤≡≠ѕєіїњљ\n";
  WriteConsoleW(cout, utfstr3.ptr, utfstr3.length, &nw);
 
  // but not  readln()`ed strings
  writef("Type a line of Unicode characters:\n");
  auto line = readln();
  writef("line len: %s\n", line.length);
  writef("%s\n", "Next line is what You typed:");
  writef("%s\n",line);
  writef("%s\n", "Your line again (with WriteConsoleA):");
  WriteConsoleA(cout, line.ptr, line.length, &nw);
  writef("%s\n", "Your line again (with WriteConsoleW):");
  WriteConsoleW(cout, line.ptr, line.length, &nw);
 
 
  writef("Done. Resetting code page.\n");
  ResetCP();
  DisplayCP();
  return 0;
 }
 --------- eof ---------
 
May 03 2009
parent "Carlos Smith" <carlos-smith sympatico.ca> writes:
"Georg Wrede" <georg.wrede iki.fi>
 Sorry, somebody else has to help, I'm not using Windows. :/
LOL! You actualy helped a lot. You are not using Windows ! It works on Linux but not on Windows. Now, it may work on Windows too. But stream.d must be modified to take care of the differences between the following cases: reading/writing to the console (true stdin/stdout) reading/writing to stdin/out redirected and probably some others (undocumented) quirks... After some reading on MSDN, i have done some testing and it works for me now. Have a very nice Linux Day... :-) Thanks.
May 03 2009