#include "ruby/ruby.h"
#include "ruby/encoding.h"
#include <winbase.h>
#include <wchar.h>
#include <shlwapi.h>
#ifndef INVALID_FILE_ATTRIBUTES
# define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
#endif
static VALUE rb_code_page;
#define IS_DIR_SEPARATOR_P(c) (c == L'\\' || c == L'/')
#define IS_DIR_UNC_P(c) (IS_DIR_SEPARATOR_P(c[0]) && IS_DIR_SEPARATOR_P(c[1]))
#define INVALID_CODE_PAGE 51932
#define PATH_BUFFER_SIZE MAX_PATH * 2
#define insecure_obj_p(obj, level) ((level) >= 4 || ((level) > 0 && OBJ_TAINTED(obj)))
static inline void
replace_wchar(wchar_t *s, int find, int replace)
{
while (*s != 0) {
if (*s == find)
*s = replace;
s++;
}
}
static inline void
convert_mb_to_wchar(VALUE str, wchar_t **wstr, wchar_t **wstr_pos, size_t *wstr_len, UINT code_page)
{
size_t len;
if (NIL_P(str))
return;
len = MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, NULL, 0) + 1;
*wstr = (wchar_t *)xmalloc(len * sizeof(wchar_t));
if (wstr_pos)
*wstr_pos = *wstr;
MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, *wstr, len);
*wstr_len = len - 2;
}
static inline void
convert_wchar_to_mb(const wchar_t *wstr, char **str, size_t *str_len, UINT code_page)
{
size_t len;
len = WideCharToMultiByte(code_page, 0, wstr, -1, NULL, 0, NULL, NULL);
*str = (char *)xmalloc(len * sizeof(char));
WideCharToMultiByte(code_page, 0, wstr, -1, *str, len, NULL, NULL);
*str_len = len - 1;
}
static wchar_t *
home_dir(void)
{
wchar_t *buffer = NULL;
size_t buffer_len = 0, len = 0;
size_t home_env = 0;
if (len = GetEnvironmentVariableW(L"HOME", NULL, 0)) {
buffer_len = len;
home_env = 1;
}
else if (len = GetEnvironmentVariableW(L"HOMEDRIVE", NULL, 0)) {
buffer_len = len;
if (len = GetEnvironmentVariableW(L"HOMEPATH", NULL, 0)) {
buffer_len += len;
home_env = 2;
}
else {
buffer_len = 0;
}
}
else if (len = GetEnvironmentVariableW(L"USERPROFILE", NULL, 0)) {
buffer_len = len;
home_env = 3;
}
if (home_env)
buffer = (wchar_t *)xmalloc(buffer_len * sizeof(wchar_t));
switch (home_env) {
case 1:
GetEnvironmentVariableW(L"HOME", buffer, buffer_len);
break;
case 2:
len = GetEnvironmentVariableW(L"HOMEDRIVE", buffer, buffer_len);
GetEnvironmentVariableW(L"HOMEPATH", buffer + len, buffer_len - len);
break;
case 3:
GetEnvironmentVariableW(L"USERPROFILE", buffer, buffer_len);
break;
default:
break;
}
if (home_env) {
replace_wchar(buffer, L'\\', L'/');
return buffer;
}
return NULL;
}
static inline size_t
remove_invalid_alternative_data(wchar_t *wfullpath, size_t size)
{
static const wchar_t prime[] = L":$DATA";
enum { prime_len = (sizeof(prime) / sizeof(wchar_t)) -1 };
if (size <= prime_len || _wcsnicmp(wfullpath + size - prime_len, prime, prime_len) != 0)
return size;
if (wfullpath[size - (prime_len + 1)] == ':') {
size -= prime_len + 1;
wfullpath[size] = L'\0';
}
else {
wchar_t *pos = wfullpath + size - (prime_len + 1);
while (!IS_DIR_SEPARATOR_P(*pos) && pos != wfullpath) {
if (*pos == L':') {
size -= prime_len;
wfullpath[size] = L'\0';
break;
}
pos--;
}
}
return size;
}
static inline UINT
system_code_page(void)
{
return AreFileApisANSI() ? CP_ACP : CP_OEMCP;
}
static UINT
code_page(rb_encoding *enc)
{
VALUE code_page_value, name_key;
VALUE encoding, names_ary = Qundef, name;
char *enc_name;
struct RString fake_str;
ID names;
long i;
if (!enc)
return system_code_page();
enc_name = (char *)rb_enc_name(enc);
fake_str.basic.flags = T_STRING|RSTRING_NOEMBED;
fake_str.basic.klass = rb_cString;
fake_str.as.heap.len = strlen(enc_name);
fake_str.as.heap.ptr = enc_name;
fake_str.as.heap.aux.capa = fake_str.as.heap.len;
name_key = (VALUE)&fake_str;
ENCODING_CODERANGE_SET(name_key, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
code_page_value = rb_hash_lookup(rb_code_page, name_key);
if (code_page_value != Qnil)
return (UINT)FIX2INT(code_page_value);
name_key = rb_usascii_str_new2(enc_name);
encoding = rb_enc_from_encoding(enc);
if (!NIL_P(encoding)) {
CONST_ID(names, "names");
names_ary = rb_funcall(encoding, names, 0);
}
if (enc == rb_usascii_encoding() || enc == rb_ascii8bit_encoding()) {
UINT code_page = 1252;
rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page));
return code_page;
}
if (names_ary != Qundef) {
for (i = 0; i < RARRAY_LEN(names_ary); i++) {
name = RARRAY_PTR(names_ary)[i];
if (strncmp("CP", RSTRING_PTR(name), 2) == 0) {
int code_page = atoi(RSTRING_PTR(name) + 2);
if (code_page != 0) {
rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page));
return (UINT)code_page;
}
}
}
}
rb_hash_aset(rb_code_page, name_key, INT2FIX(INVALID_CODE_PAGE));
return INVALID_CODE_PAGE;
}
static inline VALUE
fix_string_encoding(VALUE str, rb_encoding *encoding)
{
VALUE result, tmp;
tmp = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), encoding);
result = rb_str_encode(tmp, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil);
return result;
}
static inline size_t
replace_to_long_name(wchar_t **wfullpath, size_t size, int heap)
{
WIN32_FIND_DATAW find_data;
HANDLE find_handle;
size_t const max_short_name_size = 8 + 1 + 3;
size_t const max_extension_size = 3;
size_t path_len = 1, extension_len = 0;
wchar_t *pos = *wfullpath;
if (size == 3 && pos[1] == L':' && pos[2] == L'\\' && pos[3] == L'\0') {
return size;
}
if (wcspbrk(pos, L"*?")) {
return size;
}
pos = *wfullpath + size - 1;
while (!IS_DIR_SEPARATOR_P(*pos) && pos != *wfullpath) {
if (!extension_len && *pos == L'.') {
extension_len = path_len - 1;
}
if (path_len > max_short_name_size || extension_len > max_extension_size) {
return size;
}
path_len++;
pos--;
}
find_handle = FindFirstFileW(*wfullpath, &find_data);
if (find_handle != INVALID_HANDLE_VALUE) {
size_t trail_pos = wcslen(*wfullpath);
size_t file_len = wcslen(find_data.cFileName);
FindClose(find_handle);
while (trail_pos > 0) {
if (IS_DIR_SEPARATOR_P((*wfullpath)[trail_pos]))
break;
trail_pos--;
}
size = trail_pos + 1 + file_len;
if ((size + 1) > sizeof(*wfullpath) / sizeof((*wfullpath)[0])) {
wchar_t *buf = (wchar_t *)xmalloc((size + 1) * sizeof(wchar_t));
wcsncpy(buf, *wfullpath, trail_pos + 1);
if (heap)
xfree(*wfullpath);
*wfullpath = buf;
}
wcsncpy(*wfullpath + trail_pos + 1, find_data.cFileName, file_len + 1);
}
return size;
}
static inline VALUE
get_user_from_path(wchar_t **wpath, int offset, UINT cp, UINT path_cp, rb_encoding *path_encoding)
{
VALUE result, tmp;
wchar_t *wuser = *wpath + offset;
wchar_t *pos = wuser;
char *user;
size_t size;
while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0')
pos++;
*pos = '\0';
convert_wchar_to_mb(wuser, &user, &size, cp);
if (path_cp == INVALID_CODE_PAGE) {
tmp = rb_enc_str_new(user, size, rb_utf8_encoding());
result = rb_str_encode(tmp, rb_enc_from_encoding(path_encoding), 0, Qnil);
rb_str_resize(tmp, 0);
}
else {
result = rb_enc_str_new(user, size, path_encoding);
}
if (user)
xfree(user);
return result;
}
VALUE
rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result)
{
size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0;
size_t buffer_len = 0;
char *fullpath = NULL;
wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL;
wchar_t *wdir = NULL, *wdir_pos = NULL;
wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL;
UINT path_cp, cp;
VALUE path = fname, dir = dname;
wchar_t wfullpath_buffer[PATH_BUFFER_SIZE];
wchar_t path_drive = L'\0', dir_drive = L'\0';
int ignore_dir = 0;
rb_encoding *path_encoding;
int tainted = 0;
tainted = OBJ_TAINTED(path);
if (NIL_P(dir)) {
path_encoding = rb_enc_get(path);
}
else {
path_encoding = rb_enc_check(path, dir);
}
cp = path_cp = code_page(path_encoding);
if (path_cp == INVALID_CODE_PAGE) {
cp = CP_UTF8;
if (!NIL_P(path)) {
path = fix_string_encoding(path, path_encoding);
}
}
convert_mb_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp);
if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' &&
(wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) {
tainted = 1;
whome = home_dir();
if (whome == NULL) {
xfree(wpath);
rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
}
whome_len = wcslen(whome);
if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
xfree(wpath);
rb_raise(rb_eArgError, "non-absolute home");
}
path_encoding = rb_filesystem_encoding();
cp = path_cp = system_code_page();
ignore_dir = 1;
wpath_pos++;
wpath_len--;
if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
wpath_pos++;
wpath_len--;
}
}
else if (wpath_len >= 2 && wpath_pos[1] == L':') {
if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) {
ignore_dir = 1;
}
else {
path_drive = wpath_pos[0];
}
}
else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') {
result = get_user_from_path(&wpath_pos, 1, cp, path_cp, path_encoding);
if (wpath)
xfree(wpath);
rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
}
if (!ignore_dir && !NIL_P(dir)) {
if (path_cp == INVALID_CODE_PAGE) {
dir = fix_string_encoding(dir, path_encoding);
}
convert_mb_to_wchar(dir, &wdir, &wdir_pos, &wdir_len, cp);
if (abs_mode == 0 && wdir_len > 0 && wdir_pos[0] == L'~' &&
(wdir_len == 1 || IS_DIR_SEPARATOR_P(wdir_pos[1]))) {
tainted = 1;
whome = home_dir();
if (whome == NULL) {
xfree(wpath);
xfree(wdir);
rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
}
whome_len = wcslen(whome);
if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
xfree(wpath);
xfree(wdir);
rb_raise(rb_eArgError, "non-absolute home");
}
wdir_pos++;
wdir_len--;
if (wdir_len && IS_DIR_SEPARATOR_P(wdir_pos[0])) {
wdir_pos++;
wdir_len--;
}
}
else if (wdir_len >= 2 && wdir[1] == L':') {
dir_drive = wdir[0];
if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
wdir_len = 2;
}
}
else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) {
if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
size_t separators = 0;
size_t pos = 2;
while (pos < wdir_len && separators < 2) {
if (IS_DIR_SEPARATOR_P(wdir[pos])) {
separators++;
}
pos++;
}
if (separators == 2)
wdir_len = pos - 1;
}
}
else if (abs_mode == 0 && wdir_len >= 2 && wdir_pos[0] == L'~') {
result = get_user_from_path(&wdir_pos, 1, cp, path_cp, path_encoding);
if (wpath)
xfree(wpath);
if (wdir)
xfree(wdir);
rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
}
}
if (!ignore_dir && path_drive && dir_drive) {
if (towupper(path_drive) == towupper(dir_drive)) {
wpath_pos += 2;
wpath_len -= 2;
}
else {
ignore_dir = 1;
wdir_len = 0;
}
}
if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) {
ignore_dir = 1;
wdir_len = 0;
}
else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) &&
!dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) {
ignore_dir = 1;
wdir_len = 0;
}
buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1;
buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t));
if (whome_len) {
wcsncpy(buffer_pos, whome, whome_len);
buffer_pos += whome_len;
}
if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
buffer_pos[0] = L'\\';
buffer_pos++;
}
if (wdir_len) {
if (!tainted && OBJ_TAINTED(dir))
tainted = 1;
wcsncpy(buffer_pos, wdir_pos, wdir_len);
buffer_pos += wdir_len;
}
if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
buffer_pos[0] = L'\\';
buffer_pos++;
}
if (wpath_len) {
wcsncpy(buffer_pos, wpath_pos, wpath_len);
buffer_pos += wpath_len;
}
if (wpath_len == 0) {
buffer_pos[0] = L'.';
buffer_pos++;
}
buffer_pos[0] = L'\0';
if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer)))
tainted = 1;
size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL);
if (size > PATH_BUFFER_SIZE) {
wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t));
size = GetFullPathNameW(buffer, size, wfullpath, NULL);
}
else {
wfullpath = wfullpath_buffer;
}
if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) &&
wfullpath[size - 2] != L':' &&
!(size == 2 && IS_DIR_UNC_P(wfullpath))) {
size -= 1;
wfullpath[size] = L'\0';
}
if (wfullpath[size - 1] == L'.') {
size -= 1;
wfullpath[size] = L'\0';
}
size = remove_invalid_alternative_data(wfullpath, size);
if (long_name)
size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer));
replace_wchar(wfullpath, L'\\', L'/');
size = WideCharToMultiByte(cp, 0, wfullpath, size, NULL, 0, NULL, NULL);
if (size > (size_t)RSTRING_LEN(result)) {
rb_str_modify(result);
rb_str_resize(result, size);
}
WideCharToMultiByte(cp, 0, wfullpath, size, RSTRING_PTR(result), size, NULL, NULL);
rb_str_set_len(result, size);
if (path_cp == INVALID_CODE_PAGE) {
VALUE tmp;
size_t len;
rb_enc_associate(result, rb_utf8_encoding());
ENC_CODERANGE_CLEAR(result);
tmp = rb_str_encode(result, rb_enc_from_encoding(path_encoding), 0, Qnil);
len = RSTRING_LEN(tmp);
rb_str_modify(result);
rb_str_resize(result, len);
memcpy(RSTRING_PTR(result), RSTRING_PTR(tmp), len);
rb_str_resize(tmp, 0);
}
rb_enc_associate(result, path_encoding);
ENC_CODERANGE_CLEAR(result);
if (tainted)
OBJ_TAINT(result);
if (buffer)
xfree(buffer);
if (wpath)
xfree(wpath);
if (wdir)
xfree(wdir);
if (whome)
xfree(whome);
if (wfullpath && wfullpath != wfullpath_buffer)
xfree(wfullpath);
if (fullpath)
xfree(fullpath);
return result;
}
int
rb_file_load_ok(const char *path)
{
int ret = 1;
DWORD attr = GetFileAttributes(path);
if (attr == INVALID_FILE_ATTRIBUTES ||
attr & FILE_ATTRIBUTE_DIRECTORY) {
ret = 0;
}
else {
HANDLE h = CreateFile(path, GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (h != INVALID_HANDLE_VALUE) {
CloseHandle(h);
}
else {
ret = 0;
}
}
return ret;
}
void
rb_w32_init_file(void)
{
rb_code_page = rb_hash_new();
rb_gc_register_mark_object(rb_code_page);
}