Support loading files with Unicode BOMs. Fixes #2369
parent
d0e9c82fea
commit
13f4432bbd
|
@ -17,6 +17,7 @@ import sys
|
||||||
import time
|
import time
|
||||||
from sys import platform as _platform
|
from sys import platform as _platform
|
||||||
import config
|
import config
|
||||||
|
import codecs
|
||||||
|
|
||||||
import simplejson as json
|
import simplejson as json
|
||||||
from flask import render_template, Response, session, request as req, url_for
|
from flask import render_template, Response, session, request as req, url_for
|
||||||
|
@ -932,6 +933,68 @@ class Filemanager(object):
|
||||||
else:
|
else:
|
||||||
return newPath, newName
|
return newPath, newName
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def check_file_for_bom_and_binary(filename, enc="utf-8"):
|
||||||
|
"""
|
||||||
|
This utility function will check if file is Binary file
|
||||||
|
and/or if it startswith BOM character
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: File
|
||||||
|
enc: Encoding for the file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Status(Error?), Error message, Binary file flag,
|
||||||
|
BOM character flag and Encoding to open file
|
||||||
|
"""
|
||||||
|
status = True
|
||||||
|
err_msg = None
|
||||||
|
is_startswith_bom = False
|
||||||
|
|
||||||
|
# check if file type is text or binary
|
||||||
|
text_chars = bytearray([7, 8, 9, 10, 12, 13, 27]) \
|
||||||
|
+ bytearray(range(0x20, 0x7f)) \
|
||||||
|
+ bytearray(range(0x80, 0x100))
|
||||||
|
|
||||||
|
def is_binary_string(bytes_data):
|
||||||
|
"""Checks if string data is binary"""
|
||||||
|
return bool(
|
||||||
|
bytes_data.translate(None, text_chars)
|
||||||
|
)
|
||||||
|
|
||||||
|
# read the file
|
||||||
|
try:
|
||||||
|
|
||||||
|
with open(filename, 'rb') as f:
|
||||||
|
file_data = f.read(1024)
|
||||||
|
|
||||||
|
# Check for BOM in file data
|
||||||
|
for encoding, boms in \
|
||||||
|
('utf-8-sig', (codecs.BOM_UTF8,)), \
|
||||||
|
('utf-16', (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE)), \
|
||||||
|
('utf-32', (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)):
|
||||||
|
if any(file_data.startswith(bom) for bom in boms):
|
||||||
|
is_startswith_bom = True
|
||||||
|
enc = encoding
|
||||||
|
|
||||||
|
# Check if string is binary
|
||||||
|
is_binary = is_binary_string(file_data)
|
||||||
|
|
||||||
|
except IOError as ex:
|
||||||
|
status = False
|
||||||
|
# we don't want to expose real path of file
|
||||||
|
# so only show error message.
|
||||||
|
if ex.strerror == 'Permission denied':
|
||||||
|
err_msg = u"Error: {0}".format(ex.strerror)
|
||||||
|
else:
|
||||||
|
err_msg = u"Error: {0}".format(str(ex))
|
||||||
|
|
||||||
|
except Exception as ex:
|
||||||
|
status = False
|
||||||
|
err_msg = u"Error: {0}".format(str(ex))
|
||||||
|
|
||||||
|
return status, err_msg, is_binary, is_startswith_bom, enc
|
||||||
|
|
||||||
def addfolder(self, path, name):
|
def addfolder(self, path, name):
|
||||||
"""
|
"""
|
||||||
Functionality to create new folder
|
Functionality to create new folder
|
||||||
|
|
|
@ -12,6 +12,7 @@ import simplejson as json
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import random
|
import random
|
||||||
|
import codecs
|
||||||
|
|
||||||
from flask import Response, url_for, render_template, session, request
|
from flask import Response, url_for, render_template, session, request
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
@ -1220,7 +1221,10 @@ def load_file():
|
||||||
|
|
||||||
file_path = unquote(file_data['file_name'])
|
file_path = unquote(file_data['file_name'])
|
||||||
if hasattr(str, 'decode'):
|
if hasattr(str, 'decode'):
|
||||||
file_path = unquote(file_data['file_name']).encode('utf-8').decode('utf-8')
|
file_path = unquote(
|
||||||
|
file_data['file_name']
|
||||||
|
).encode('utf-8').decode('utf-8')
|
||||||
|
|
||||||
# retrieve storage directory path
|
# retrieve storage directory path
|
||||||
storage_manager_path = get_storage_directory()
|
storage_manager_path = get_storage_directory()
|
||||||
if storage_manager_path:
|
if storage_manager_path:
|
||||||
|
@ -1230,45 +1234,27 @@ def load_file():
|
||||||
file_path.lstrip('/').lstrip('\\')
|
file_path.lstrip('/').lstrip('\\')
|
||||||
)
|
)
|
||||||
|
|
||||||
file_data = None
|
status, err_msg, is_binary, \
|
||||||
|
is_startswith_bom, enc = Filemanager.check_file_for_bom_and_binary(
|
||||||
|
file_path
|
||||||
|
)
|
||||||
|
|
||||||
# check if file type is text or binary
|
if not status:
|
||||||
textchars = bytearray(
|
return internal_server_error(
|
||||||
[7, 8, 9, 10, 12, 13, 27]) + bytearray(
|
errormsg=gettext(err_msg)
|
||||||
range(0x20, 0x7f)) + bytearray(range(0x80, 0x100))
|
)
|
||||||
|
|
||||||
is_binary_string = lambda bytes: bool(
|
if is_binary:
|
||||||
bytes.translate(None, textchars)
|
return internal_server_error(
|
||||||
)
|
errormsg=gettext("File type not supported")
|
||||||
|
)
|
||||||
|
|
||||||
|
with codecs.open(file_path, 'r', encoding=enc) as fileObj:
|
||||||
|
data = fileObj.read()
|
||||||
|
|
||||||
# read file
|
|
||||||
try:
|
|
||||||
with open(file_path, 'rb') as fileObj:
|
|
||||||
is_binary = is_binary_string(fileObj.read(1024))
|
|
||||||
if not is_binary:
|
|
||||||
fileObj.seek(0)
|
|
||||||
if hasattr(str, 'decode'):
|
|
||||||
file_data = fileObj.read().decode('utf-8')
|
|
||||||
else:
|
|
||||||
file_data = fileObj.read()
|
|
||||||
else:
|
|
||||||
return internal_server_error(
|
|
||||||
errormsg=gettext("File type not supported")
|
|
||||||
)
|
|
||||||
except IOError as e:
|
|
||||||
# we don't want to expose real path of file
|
|
||||||
# so only show error message.
|
|
||||||
if e.strerror == 'Permission denied':
|
|
||||||
err_msg = "Error: {0}".format(e.strerror)
|
|
||||||
else:
|
|
||||||
err_msg = "Error: {0}".format(e.strerror)
|
|
||||||
return internal_server_error(errormsg=err_msg)
|
|
||||||
except Exception as e:
|
|
||||||
err_msg = "Error: {0}".format(e.strerror)
|
|
||||||
return internal_server_error(errormsg=err_msg)
|
|
||||||
return make_json_response(
|
return make_json_response(
|
||||||
data={
|
data={
|
||||||
'status': True, 'result': file_data,
|
'status': True, 'result': data,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue