Support loading files with Unicode BOMs. Fixes #2369
parent
d0e9c82fea
commit
13f4432bbd
|
@ -17,6 +17,7 @@ import sys
|
|||
import time
|
||||
from sys import platform as _platform
|
||||
import config
|
||||
import codecs
|
||||
|
||||
import simplejson as json
|
||||
from flask import render_template, Response, session, request as req, url_for
|
||||
|
@ -932,6 +933,68 @@ class Filemanager(object):
|
|||
else:
|
||||
return newPath, newName
|
||||
|
||||
@staticmethod
|
||||
def check_file_for_bom_and_binary(filename, enc="utf-8"):
|
||||
"""
|
||||
This utility function will check if file is Binary file
|
||||
and/or if it startswith BOM character
|
||||
|
||||
Args:
|
||||
filename: File
|
||||
enc: Encoding for the file
|
||||
|
||||
Returns:
|
||||
Status(Error?), Error message, Binary file flag,
|
||||
BOM character flag and Encoding to open file
|
||||
"""
|
||||
status = True
|
||||
err_msg = None
|
||||
is_startswith_bom = False
|
||||
|
||||
# check if file type is text or binary
|
||||
text_chars = bytearray([7, 8, 9, 10, 12, 13, 27]) \
|
||||
+ bytearray(range(0x20, 0x7f)) \
|
||||
+ bytearray(range(0x80, 0x100))
|
||||
|
||||
def is_binary_string(bytes_data):
|
||||
"""Checks if string data is binary"""
|
||||
return bool(
|
||||
bytes_data.translate(None, text_chars)
|
||||
)
|
||||
|
||||
# read the file
|
||||
try:
|
||||
|
||||
with open(filename, 'rb') as f:
|
||||
file_data = f.read(1024)
|
||||
|
||||
# Check for BOM in file data
|
||||
for encoding, boms in \
|
||||
('utf-8-sig', (codecs.BOM_UTF8,)), \
|
||||
('utf-16', (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE)), \
|
||||
('utf-32', (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)):
|
||||
if any(file_data.startswith(bom) for bom in boms):
|
||||
is_startswith_bom = True
|
||||
enc = encoding
|
||||
|
||||
# Check if string is binary
|
||||
is_binary = is_binary_string(file_data)
|
||||
|
||||
except IOError as ex:
|
||||
status = False
|
||||
# we don't want to expose real path of file
|
||||
# so only show error message.
|
||||
if ex.strerror == 'Permission denied':
|
||||
err_msg = u"Error: {0}".format(ex.strerror)
|
||||
else:
|
||||
err_msg = u"Error: {0}".format(str(ex))
|
||||
|
||||
except Exception as ex:
|
||||
status = False
|
||||
err_msg = u"Error: {0}".format(str(ex))
|
||||
|
||||
return status, err_msg, is_binary, is_startswith_bom, enc
|
||||
|
||||
def addfolder(self, path, name):
|
||||
"""
|
||||
Functionality to create new folder
|
||||
|
|
|
@ -12,6 +12,7 @@ import simplejson as json
|
|||
import os
|
||||
import pickle
|
||||
import random
|
||||
import codecs
|
||||
|
||||
from flask import Response, url_for, render_template, session, request
|
||||
from flask_babel import gettext
|
||||
|
@ -1220,7 +1221,10 @@ def load_file():
|
|||
|
||||
file_path = unquote(file_data['file_name'])
|
||||
if hasattr(str, 'decode'):
|
||||
file_path = unquote(file_data['file_name']).encode('utf-8').decode('utf-8')
|
||||
file_path = unquote(
|
||||
file_data['file_name']
|
||||
).encode('utf-8').decode('utf-8')
|
||||
|
||||
# retrieve storage directory path
|
||||
storage_manager_path = get_storage_directory()
|
||||
if storage_manager_path:
|
||||
|
@ -1230,45 +1234,27 @@ def load_file():
|
|||
file_path.lstrip('/').lstrip('\\')
|
||||
)
|
||||
|
||||
file_data = None
|
||||
status, err_msg, is_binary, \
|
||||
is_startswith_bom, enc = Filemanager.check_file_for_bom_and_binary(
|
||||
file_path
|
||||
)
|
||||
|
||||
# check if file type is text or binary
|
||||
textchars = bytearray(
|
||||
[7, 8, 9, 10, 12, 13, 27]) + bytearray(
|
||||
range(0x20, 0x7f)) + bytearray(range(0x80, 0x100))
|
||||
if not status:
|
||||
return internal_server_error(
|
||||
errormsg=gettext(err_msg)
|
||||
)
|
||||
|
||||
is_binary_string = lambda bytes: bool(
|
||||
bytes.translate(None, textchars)
|
||||
)
|
||||
if is_binary:
|
||||
return internal_server_error(
|
||||
errormsg=gettext("File type not supported")
|
||||
)
|
||||
|
||||
with codecs.open(file_path, 'r', encoding=enc) as fileObj:
|
||||
data = fileObj.read()
|
||||
|
||||
# read file
|
||||
try:
|
||||
with open(file_path, 'rb') as fileObj:
|
||||
is_binary = is_binary_string(fileObj.read(1024))
|
||||
if not is_binary:
|
||||
fileObj.seek(0)
|
||||
if hasattr(str, 'decode'):
|
||||
file_data = fileObj.read().decode('utf-8')
|
||||
else:
|
||||
file_data = fileObj.read()
|
||||
else:
|
||||
return internal_server_error(
|
||||
errormsg=gettext("File type not supported")
|
||||
)
|
||||
except IOError as e:
|
||||
# we don't want to expose real path of file
|
||||
# so only show error message.
|
||||
if e.strerror == 'Permission denied':
|
||||
err_msg = "Error: {0}".format(e.strerror)
|
||||
else:
|
||||
err_msg = "Error: {0}".format(e.strerror)
|
||||
return internal_server_error(errormsg=err_msg)
|
||||
except Exception as e:
|
||||
err_msg = "Error: {0}".format(e.strerror)
|
||||
return internal_server_error(errormsg=err_msg)
|
||||
return make_json_response(
|
||||
data={
|
||||
'status': True, 'result': file_data,
|
||||
'status': True, 'result': data,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue