Support loading files with Unicode BOMs. Fixes #2369

pull/5/head
Murtuza Zabuawala 2017-05-09 12:06:49 +01:00 committed by Dave Page
parent d0e9c82fea
commit 13f4432bbd
2 changed files with 84 additions and 35 deletions

View File

@ -17,6 +17,7 @@ import sys
import time import time
from sys import platform as _platform from sys import platform as _platform
import config import config
import codecs
import simplejson as json import simplejson as json
from flask import render_template, Response, session, request as req, url_for from flask import render_template, Response, session, request as req, url_for
@ -932,6 +933,68 @@ class Filemanager(object):
else: else:
return newPath, newName return newPath, newName
@staticmethod
def check_file_for_bom_and_binary(filename, enc="utf-8"):
"""
This utility function will check if file is Binary file
and/or if it startswith BOM character
Args:
filename: File
enc: Encoding for the file
Returns:
Status(Error?), Error message, Binary file flag,
BOM character flag and Encoding to open file
"""
status = True
err_msg = None
is_startswith_bom = False
# check if file type is text or binary
text_chars = bytearray([7, 8, 9, 10, 12, 13, 27]) \
+ bytearray(range(0x20, 0x7f)) \
+ bytearray(range(0x80, 0x100))
def is_binary_string(bytes_data):
"""Checks if string data is binary"""
return bool(
bytes_data.translate(None, text_chars)
)
# read the file
try:
with open(filename, 'rb') as f:
file_data = f.read(1024)
# Check for BOM in file data
for encoding, boms in \
('utf-8-sig', (codecs.BOM_UTF8,)), \
('utf-16', (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE)), \
('utf-32', (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)):
if any(file_data.startswith(bom) for bom in boms):
is_startswith_bom = True
enc = encoding
# Check if string is binary
is_binary = is_binary_string(file_data)
except IOError as ex:
status = False
# we don't want to expose real path of file
# so only show error message.
if ex.strerror == 'Permission denied':
err_msg = u"Error: {0}".format(ex.strerror)
else:
err_msg = u"Error: {0}".format(str(ex))
except Exception as ex:
status = False
err_msg = u"Error: {0}".format(str(ex))
return status, err_msg, is_binary, is_startswith_bom, enc
def addfolder(self, path, name): def addfolder(self, path, name):
""" """
Functionality to create new folder Functionality to create new folder

View File

@ -12,6 +12,7 @@ import simplejson as json
import os import os
import pickle import pickle
import random import random
import codecs
from flask import Response, url_for, render_template, session, request from flask import Response, url_for, render_template, session, request
from flask_babel import gettext from flask_babel import gettext
@ -1220,7 +1221,10 @@ def load_file():
file_path = unquote(file_data['file_name']) file_path = unquote(file_data['file_name'])
if hasattr(str, 'decode'): if hasattr(str, 'decode'):
file_path = unquote(file_data['file_name']).encode('utf-8').decode('utf-8') file_path = unquote(
file_data['file_name']
).encode('utf-8').decode('utf-8')
# retrieve storage directory path # retrieve storage directory path
storage_manager_path = get_storage_directory() storage_manager_path = get_storage_directory()
if storage_manager_path: if storage_manager_path:
@ -1230,45 +1234,27 @@ def load_file():
file_path.lstrip('/').lstrip('\\') file_path.lstrip('/').lstrip('\\')
) )
file_data = None status, err_msg, is_binary, \
is_startswith_bom, enc = Filemanager.check_file_for_bom_and_binary(
file_path
)
# check if file type is text or binary if not status:
textchars = bytearray( return internal_server_error(
[7, 8, 9, 10, 12, 13, 27]) + bytearray( errormsg=gettext(err_msg)
range(0x20, 0x7f)) + bytearray(range(0x80, 0x100)) )
is_binary_string = lambda bytes: bool( if is_binary:
bytes.translate(None, textchars) return internal_server_error(
) errormsg=gettext("File type not supported")
)
with codecs.open(file_path, 'r', encoding=enc) as fileObj:
data = fileObj.read()
# read file
try:
with open(file_path, 'rb') as fileObj:
is_binary = is_binary_string(fileObj.read(1024))
if not is_binary:
fileObj.seek(0)
if hasattr(str, 'decode'):
file_data = fileObj.read().decode('utf-8')
else:
file_data = fileObj.read()
else:
return internal_server_error(
errormsg=gettext("File type not supported")
)
except IOError as e:
# we don't want to expose real path of file
# so only show error message.
if e.strerror == 'Permission denied':
err_msg = "Error: {0}".format(e.strerror)
else:
err_msg = "Error: {0}".format(e.strerror)
return internal_server_error(errormsg=err_msg)
except Exception as e:
err_msg = "Error: {0}".format(e.strerror)
return internal_server_error(errormsg=err_msg)
return make_json_response( return make_json_response(
data={ data={
'status': True, 'result': file_data, 'status': True, 'result': data,
} }
) )