Last entry, I hope. It got the right results on files of all standard file types (ANSI, plus UTF8 or UTF16 LE or BE, both with and without BOM), and properly rejects UTF32 LE or BE because PB can't open them anyways.
It uses the Windows API function IsTextUnicode that Roland suggested, with two different constant arguments (to distinguish UTF16 LE from the less likely UTF16 BE):
Function long IsTextUnicode ( &
ref blob lpv, &
long iSize, &
ref long lpiResult &
) Library "advapi32.dll"
Constant Long IS_TEXT_UNICODE_STATISTICS = 2
Constant Long IS_TEXT_UNICODE_REVERSE_STATISTICS = 32
function boolean gf_read_text_file(REF string as_result, string as_filename, string as_description)
int li_file
long ll_bytes, ll_result
blob lblbBytes
byte lbBytes[]
Encoding lEncoding
String lsTemp
li_file = FileOpen(as_filename, StreamMode!, Read!, Shared!)
if li_file = -1 then
gMsg.Show("Cannot open " + as_description + " " + as_filename + ":~n~n" + &
gnv_environment.uf_last_error_message(), Exclamation!)
return FALSE
end if
ll_bytes = FileReadEx(li_file, lblbBytes)
FileClose(li_file)
if ll_bytes = -100 then
gMsg.Show("The " + as_description + " " + as_filename + " is empty.", Exclamation!)
return FALSE
elseif ll_bytes = -1 then
gMsg.Show("Cannot read " + as_description + " " + as_filename + ":~n~n" + &
gnv_environment.uf_last_error_message(), Exclamation!)
return FALSE
end if
lbBytes = GetByteArray(BlobMid(lblbBytes, 1, 4))
if (lbBytes[1] = 0 and lbBytes[2] = 0 and lbBytes[3] = 254 and lbBytes[4] = 255) or &
(lbBytes[1] = 255 and lbBytes[2] = 254 and lbBytes[3] = 0 and lbBytes[4] = 0) &
then
gMsg.Show("The " + as_description + " " + as_filename + " is in an encoding that " + &
"the program cannot read: UTF32.", Exclamation!)
return FALSE
end if
lEncoding = FileEncoding(as_filename)
if lEncoding = EncodingAnsi! then
if Mod(ll_bytes, 2) = 0 then
ll_result = gnv_environment.IS_TEXT_UNICODE_STATISTICS
gnv_environment.IsTextUnicode(lblbBytes, ll_bytes, ll_result)
if ll_result > 0 then
lEncoding = EncodingUTF16LE!
else
ll_result = gnv_environment.IS_TEXT_UNICODE_REVERSE_STATISTICS
gnv_environment.IsTextUnicode(lblbBytes, ll_bytes, ll_result)
if ll_result > 0 then
lEncoding = EncodingUTF16BE!
end if
end if
end if
end if
if lEncoding = EncodingAnsi! then
lsTemp = String(lblbBytes, EncodingUTF8!)
if Len(lsTemp) < ll_bytes then
lEncoding = EncodingUTF8!
end if
end if
as_result = String(lblbBytes, lEncoding)
return TRUE