Can't read unicode filenames

Windows specific questions.
Post Reply
jimdunn
Posts: 13
Joined: Jan 28, 2016 15:07

Can't read unicode filenames

Post by jimdunn »

Hi, could someone tell me the secret to "reading unicode filenames" using FindFirstFileW ???

Here's some code that fails when it hits a file name containing unicode:

Code: Select all

' FreeBasic Application
' dups.bas by Jim Dunn, 2022-03-27, v0.1

' =============================================================================
#Include "jCommon.inc"

' =============================================================================
Function WinMain( _
    ByVal hInstance     As HINSTANCE, _
    ByVal hPrevInstance As HINSTANCE, _
    ByVal szCmdLine     As ZString Ptr, _
    ByVal nCmdShow      As Long _
    ) As Long

    Dim AS CWSTR wszFolder, wszFileName(Any)
    Dim As String sBuf1, sBuf2
    Dim As ULONGINT uliSizes(Any), bufSiz, i
    Dim As Long x, y, z, lineCount
    Dim As Integer fp1, fp2, jFlag

    if Len(Command) < 1 then
        jPrint("")
        jPrint("Usage: dups.exe [folder to scan for duplicates]")
        goto exitfunction
    end if

    wszFolder = Command
    lineCount = jListFiles(wszFolder,wszFileName(),uliSizes())

    for x = 0 to UBound(wszFileName) - 1
        for y = x+1 to UBound(wszFileName)
            if uliSizes(x) > 0 and uliSizes(x) = uliSizes(y) then
                sBuf1 = ""
                fp1 = FreeFile
                If Open(wszFileName(x) For Binary Access Read As #fp1) <> 0 Then
                    jPrint("Error reading [" & wszFileName(x) & "]")
                    goto exitfunction
                End If
                sBuf2 = ""
                fp2 = FreeFile
                If Open(wszFileName(y) For Binary Access Read As #fp2) <> 0 Then
                    jPrint("Error reading [" & wszFileName(y) & "]")
                    goto exitfunction
                End If
                bufSiz = 4194304
                if bufSiz > LOF(fp1) then
                    bufSiz = LOF(fp1)
                end if
                i = 0
                While i < LOF(fp1)
                    sBuf1 = String(LOF(fp1),0):
                    if Get(#fp1,bufSiz,sBuf1) <> 0 then
                        sBuf1 = ""
                    end if
                    sBuf2 = String(LOF(fp2),0)
                    if Get(#fp2,bufSiz,sBuf2) <> 0 then
                        sBuf2 = ""
                    end if
                    if sBuf1 <> sBuf2 then
                        i = 0 ' setting to 0 meaning NOT A DUPLICATE
                        exit while
                    end if
                    i = i + bufSiz
                Wend
                Close #fp1
                Close #fp2
                if i > 0 then ' >0 = duplicate
                    if Len(wszFileName(x)) > Len(wszFileName(y)) then
                        jPrint("del " & chr(34) & wszFileName(x) & chr(34))
                        uliSizes(x) = 0 ' filesize = 0 skips compare on next loop
                    else
                        jPrint("del " & chr(34) & wszFileName(y) & chr(34))
                        uliSizes(y) = 0 ' filesize = 0 skips compare on next loop
                    end if
                end if
            end if
        next y
    next x

    ' =========================================================================
exitfunction:
    WinMain = 0
End Function

' =============================================================================
End WinMain(GetModuleHandle(Null), Null, Command(), SW_NORMAL)
' =============================================================================
And the source to jListFiles()

Code: Select all

Function jListFiles OverLoad(ByVal wszPath as CWSTR, wszArray() AS CWSTR, uliArray() AS ULONGINT) as LONG
    Dim hSearch as HANDLE
    Dim WFD AS WIN32_FIND_DATAW

    Dim wszCurPath AS CWSTR
    Dim wszFullPath AS CWSTR
    Dim AS LONG lineCount = 0

    if right(wszPath,1) = "*" then
        wszCurPath = wszPath
    else
        if right(wszPath,1) <> "\" then
            wszPath += "\"
        end if
        wszCurPath = wszPath + "*.*"
    end if

    ' Find the files
    hSearch = FindFirstFileW(wszCurPath, @WFD)
    IF hSearch <> INVALID_HANDLE_VALUE THEN
        DO
            IF (WFD.dwFileAttributes AND FILE_ATTRIBUTE_DIRECTORY) = FILE_ATTRIBUTE_DIRECTORY THEN
                ' found a folder
            ELSE
                lineCount += 1
                wszFullPath = wszPath & WFD.cFileName ' Store the full path
                ReDim Preserve wszArray(UBound(wszArray) + 1) AS CWSTR ' zero-based array
                wszArray(UBound(wszArray)) = wszFullPath ' file name
                ReDim Preserve uliArray(UBound(uliArray) + 1) AS ULONGINT ' zero-based array
                uliArray(UBound(uliArray)) = (WFD.nFileSizeHigh * (&hFFFFFFFF + 1)) + WFD.nFileSizeLow ' file size
            END IF
        LOOP WHILE FindNextFileW(hSearch, @WFD)
        FindClose(hSearch)
    END IF
    Return lineCount
End Function
srvaldez
Posts: 3373
Joined: Sep 25, 2005 21:54

Re: Can't read unicode filenames

Post by srvaldez »

hello jimdunn
have you tried José Roca's framework ?
here's a post by him viewtopic.php?p=282744#p282744
marcov
Posts: 3455
Joined: Jun 16, 2005 9:45
Location: Netherlands
Contact:

Re: Can't read unicode filenames

Post by marcov »

The question is if open() is overloaded for widestrings. Check assembler to see if it calls a wide version
Josep Roca
Posts: 564
Joined: Sep 27, 2016 18:20
Location: Valencia, Spain

Re: Can't read unicode filenames

Post by Josep Roca »

FreeBasic's OPEN statement does not support Unicode filenames.
You can use my CFileStream Class: https://github.com/JoseRoca/WinFBX/blob ... ss.md#Read
PaulSquires
Posts: 999
Joined: Jul 14, 2005 23:41

Re: Can't read unicode filenames

Post by PaulSquires »

I highly recommend using Jose's WinFBX code libraries when using unicode on Windows platforms. It simplifies everything because you never have to guess whether the native FB commands work well with Windows unicode. Jose's versions always work. All my file related code has long been converted to use Jose's file classes. You can also use Jose's CWSTR class to work well as a dynamic unicode string class. That class integrates perfectly with all of his other routines that manipulate strings. (As an aside, I wrote the entire WinFBE Editor using Jose's WinFBX as a core development library along with a lot of Win32API)
dodicat
Posts: 7976
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: Can't read unicode filenames

Post by dodicat »

You can do this via crt.bi

Code: Select all

#include "crt.bi"

Function filelength(filename As wstring Ptr)As Long
      Dim As wstring * 4 k="r"
      Var fp=_wfopen(filename,@k)
      If fp = 0 Then Print "Error opening file":Sleep:End
      fseek(fp, 0, SEEK_END)
      Var length=ftell(fp)
      fclose(fp)
      Return(length)
End Function

Sub savefilestring(content As String ,filename As wstring Ptr)
      Dim As wstring * 4 k="wb"
      Var fp= _wfopen(filename,@k)
      If fp = 0 Then Print "Error opening file":Sleep:End
      fwrite(@content[0], 1, Len(content), fp)
      fclose(fp)
End Sub

Sub loadfilestring( content As String,filename As wString Ptr)
      Var l=Filelength(filename)
      content=String(l,0)
      Dim As wstring * 4 k="rb"
      Var fp= _wfopen(filename,@k)
      If fp = 0 Then Print "Error loading file ";filename:Sleep:End
      fread(@content[0], 1,l, fp)
      fclose(fp)
End Sub

Function exists(filename As wString Ptr) As boolean
      Dim As wstring * 4 k="r"
      Var fp= _wfopen(filename,@k)
      If fp=0 Then
            Return false
      Else
            fclose(fp)
            Return true
      End If
      
End Function

Dim As wstring * 30 filename=Wchr(&h0414, &h043e, &h0431, &h0440, &h043E)+".txt"
Print "Filename ";filename

savefilestring("abcde",filename)
Dim As String g

loadfilestring(g,filename)
Print "'";g;"'"
_wremove(@filename)
Print "Does file exist? ";exists(filename)
Sleep
 
jimdunn
Posts: 13
Joined: Jan 28, 2016 15:07

Re: Can't read unicode filenames

Post by jimdunn »

Wow, thank you all for the replies, I will try it!!!
marcov
Posts: 3455
Joined: Jun 16, 2005 9:45
Location: Netherlands
Contact:

Re: Can't read unicode filenames

Post by marcov »

Personally I would try the manifest way, to switch the whole application to utf8. That solves nearly all calls in one big swoop.

https://docs.microsoft.com/en-us/window ... -code-page
Post Reply