www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.bugs - [Issue 885] New: Recursion into folders with FindFirstFileA/FindNextFileA behaves incorrectly

reply d-bugmail puremagic.com writes:
http://d.puremagic.com/issues/show_bug.cgi?id=885

           Summary: Recursion into folders with FindFirstFileA/FindNextFileA
                    behaves incorrectly
           Product: D
           Version: 1.001
          Platform: PC
        OS/Version: Windows
            Status: NEW
          Severity: major
          Priority: P2
         Component: www.digitalmars.com
        AssignedTo: bugzilla digitalmars.com
        ReportedBy: fdp mcs.be


The accompanying program was used to find files in a (quite huge) directory
structure using a regex expression.  When I noticed that some files that were
there were not found, I added a line to print the directories that get
'visited'.  That's where I noticed that some directories were not recursed
into.  Furthermore this behaviour is not constant; the directories that are
skipt are not always the same ones.  Thinking of a memory problem, I also
experimented by adding fullCollect and genCollect calls at various places and
this had indeed an influence on which directories were skipped.

Here is the code:


import std.c.windows.windows;
import std.path;
import std.stdio;
import std.regexp;
import std.utf;

void visit (char[] p_strFolder, RegExp p_patt)
{
   writefln (">>> ", p_strFolder); // added for debugging purposes
   WIN32_FIND_DATA fd;
   char[] strSpec = join (p_strFolder, "*");
   HANDLE h = FindFirstFileA (cast(char*)strSpec, &fd);
   bool bOK = h != INVALID_HANDLE_VALUE;
   while (bOK)
   {
      wchar[] wbuf;

      size_t clength = std.string.strlen(fd.cFileName.ptr);
         // Convert cFileName[] to unicode
      size_t wlength =
MultiByteToWideChar(0,0,fd.cFileName.ptr,clength,null,0);
      if (wlength > wbuf.length)
      {
         wbuf.length = wlength;
      }
      size_t n =
MultiByteToWideChar(0,0,fd.cFileName.ptr,clength,cast(wchar*)wbuf,wlength);
      assert(n == wlength);
         // toUTF8() returns a new buffer
      char[] strName = std.utf.toUTF8(wbuf[0 .. wlength]);

      if ((fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)
      {
         if (0 != std.string.cmp (strName, ".")  &&
             0 != std.string.cmp (strName, "..") &&
             0 != std.string.cmp (strName, ".svn"))
         {
            visit (join (p_strFolder, strName), p_patt);
         }
      }
      else
      {
         char[] strFullName = join (p_strFolder, strName);
         if (-1 != p_patt.find(std.string.tolower (strFullName)))
         {
           // removed code for debugging purposes
         }
      }
      bOK = cast(bool)FindNextFileA (h, &fd);
   }
   FindClose (h);
}

void main (char [][] p_args)
{
   if (p_args.length == 1)
   {
      visit (curdir, RegExp ("."));
   }
   else
   {
      visit (p_args[1], RegExp (std.string.tolower (p_args[1])));
   }
}

PS: I just tested on 1.002 also and the behaviour is the same.


-- 
Jan 24 2007
next sibling parent kris <foo bar.com> writes:
d-bugmail puremagic.com wrote:
 http://d.puremagic.com/issues/show_bug.cgi?id=885
 
            Summary: Recursion into folders with FindFirstFileA/FindNextFileA
                     behaves incorrectly
            Product: D
            Version: 1.001
           Platform: PC
         OS/Version: Windows
             Status: NEW
           Severity: major
           Priority: P2
          Component: www.digitalmars.com
         AssignedTo: bugzilla digitalmars.com
         ReportedBy: fdp mcs.be
 
 
 The accompanying program was used to find files in a (quite huge) directory
 structure using a regex expression.  When I noticed that some files that were
 there were not found, I added a line to print the directories that get
 'visited'.  That's where I noticed that some directories were not recursed
 into.  Furthermore this behaviour is not constant; the directories that are
 skipt are not always the same ones.  Thinking of a memory problem, I also
 experimented by adding fullCollect and genCollect calls at various places and
 this had indeed an influence on which directories were skipped.
 
 Here is the code:
 
 
 import std.c.windows.windows;
 import std.path;
 import std.stdio;
 import std.regexp;
 import std.utf;
 
 void visit (char[] p_strFolder, RegExp p_patt)
 {
    writefln (">>> ", p_strFolder); // added for debugging purposes
    WIN32_FIND_DATA fd;
    char[] strSpec = join (p_strFolder, "*");
    HANDLE h = FindFirstFileA (cast(char*)strSpec, &fd);
    bool bOK = h != INVALID_HANDLE_VALUE;
    while (bOK)
    {
       wchar[] wbuf;
 
       size_t clength = std.string.strlen(fd.cFileName.ptr);
          // Convert cFileName[] to unicode
       size_t wlength =
 MultiByteToWideChar(0,0,fd.cFileName.ptr,clength,null,0);
       if (wlength > wbuf.length)
       {
          wbuf.length = wlength;
       }
       size_t n =
 MultiByteToWideChar(0,0,fd.cFileName.ptr,clength,cast(wchar*)wbuf,wlength);
       assert(n == wlength);
          // toUTF8() returns a new buffer
       char[] strName = std.utf.toUTF8(wbuf[0 .. wlength]);
 
       if ((fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)
       {
          if (0 != std.string.cmp (strName, ".")  &&
              0 != std.string.cmp (strName, "..") &&
              0 != std.string.cmp (strName, ".svn"))
          {
             visit (join (p_strFolder, strName), p_patt);
          }
       }
       else
       {
          char[] strFullName = join (p_strFolder, strName);
          if (-1 != p_patt.find(std.string.tolower (strFullName)))
          {
            // removed code for debugging purposes
          }
       }
       bOK = cast(bool)FindNextFileA (h, &fd);
    }
    FindClose (h);
 }
 
 void main (char [][] p_args)
 {
    if (p_args.length == 1)
    {
       visit (curdir, RegExp ("."));
    }
    else
    {
       visit (p_args[1], RegExp (std.string.tolower (p_args[1])));
    }
 }
 
 PS: I just tested on 1.002 also and the behaviour is the same.
 
 

You're certain FindFirst() et al can be recursed like this? IIRC, they maintain some global state?
Jan 25 2007
prev sibling next sibling parent d-bugmail puremagic.com writes:
http://d.puremagic.com/issues/show_bug.cgi?id=885





------- Comment #1 from bugzilla digitalmars.com  2007-01-27 23:01 -------
Could you try this again with DMD 1.004?


-- 
Jan 27 2007
prev sibling next sibling parent d-bugmail puremagic.com writes:
http://d.puremagic.com/issues/show_bug.cgi?id=885





------- Comment #2 from fdp mcs.be  2007-01-29 00:37 -------
(In reply to comment #0)
 The accompanying program was used to find files in a (quite huge) directory
 structure using a regex expression.  When I noticed that some files that were
 there were not found, I added a line to print the directories that get
 'visited'.  That's where I noticed that some directories were not recursed
 into.  Furthermore this behaviour is not constant; the directories that are
 skipt are not always the same ones.  Thinking of a memory problem, I also
 experimented by adding fullCollect and genCollect calls at various places and
 this had indeed an influence on which directories were skipped.
 
 Here is the code:
 
 
 import std.c.windows.windows;
 import std.path;
 import std.stdio;
 import std.regexp;
 import std.utf;
 
 void visit (char[] p_strFolder, RegExp p_patt)
 {
    writefln (">>> ", p_strFolder); // added for debugging purposes
    WIN32_FIND_DATA fd;
    char[] strSpec = join (p_strFolder, "*");
    HANDLE h = FindFirstFileA (cast(char*)strSpec, &fd);
    bool bOK = h != INVALID_HANDLE_VALUE;
    while (bOK)
    {
       wchar[] wbuf;
 
       size_t clength = std.string.strlen(fd.cFileName.ptr);
          // Convert cFileName[] to unicode
       size_t wlength =
 MultiByteToWideChar(0,0,fd.cFileName.ptr,clength,null,0);
       if (wlength > wbuf.length)
       {
          wbuf.length = wlength;
       }
       size_t n =
 MultiByteToWideChar(0,0,fd.cFileName.ptr,clength,cast(wchar*)wbuf,wlength);
       assert(n == wlength);
          // toUTF8() returns a new buffer
       char[] strName = std.utf.toUTF8(wbuf[0 .. wlength]);
 
       if ((fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)
       {
          if (0 != std.string.cmp (strName, ".")  &&
              0 != std.string.cmp (strName, "..") &&
              0 != std.string.cmp (strName, ".svn"))
          {
             visit (join (p_strFolder, strName), p_patt);
          }
       }
       else
       {
          char[] strFullName = join (p_strFolder, strName);
          if (-1 != p_patt.find(std.string.tolower (strFullName)))
          {
            // removed code for debugging purposes
          }
       }
       bOK = cast(bool)FindNextFileA (h, &fd);
    }
    FindClose (h);
 }
 
 void main (char [][] p_args)
 {
    if (p_args.length == 1)
    {
       visit (curdir, RegExp ("."));
    }
    else
    {
       visit (p_args[1], RegExp (std.string.tolower (p_args[1])));
    }
 }
 
 PS: I just tested on 1.002 also and the behaviour is the same.
 

(In reply to comment #1)
 Could you try this again with DMD 1.004?
 

I regret to say that the problem persists in 1.004. --
Jan 28 2007
prev sibling parent d-bugmail puremagic.com writes:
http://d.puremagic.com/issues/show_bug.cgi?id=885


fdp mcs.be changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|                            |INVALID




------- Comment #3 from fdp mcs.be  2007-02-26 07:41 -------
My mistake, sorry; had to use toStringz instead of just cast(char*) in the
line:
HANDLE h = FindFirstFileA (cast(char*)strSpec, &fd);

sorry for the trouble


-- 
Feb 26 2007