From da7f088508b538651db7a1aee48b3efea593ecd5 Mon Sep 17 00:00:00 2001
From: Murtuza Zabuawala <murtuza.zabuawala@enterprisedb.com>
Date: Fri, 2 Mar 2018 13:36:50 +0000
Subject: [PATCH] Handle opening of non-UTF8 compatible files. Fixes #3129

---
 web/pgadmin/tools/sqleditor/__init__.py       | 13 ++---
 .../sqleditor/utils/query_tool_fs_utils.py    | 53 +++++++++++++++++++
 .../utils/tests/test_file_other_encoding.sql  |  2 +
 .../utils/tests/test_file_utf8_encoding.sql   |  2 +
 .../utils/tests/test_query_tool_fs_utils.py   | 45 ++++++++++++++++
 5 files changed, 105 insertions(+), 10 deletions(-)
 create mode 100644 web/pgadmin/tools/sqleditor/utils/query_tool_fs_utils.py
 create mode 100644 web/pgadmin/tools/sqleditor/utils/tests/test_file_other_encoding.sql
 create mode 100644 web/pgadmin/tools/sqleditor/utils/tests/test_file_utf8_encoding.sql
 create mode 100644 web/pgadmin/tools/sqleditor/utils/tests/test_query_tool_fs_utils.py

diff --git a/web/pgadmin/tools/sqleditor/__init__.py b/web/pgadmin/tools/sqleditor/__init__.py
index 8c17c97ae..0f3c90991 100644
--- a/web/pgadmin/tools/sqleditor/__init__.py
+++ b/web/pgadmin/tools/sqleditor/__init__.py
@@ -38,6 +38,8 @@ from pgadmin.utils.exception import ConnectionLost
 from pgadmin.utils.sqlautocomplete.autocomplete import SQLAutoComplete
 from pgadmin.tools.sqleditor.utils.query_tool_preferences import \
     RegisterQueryToolPreferences
+from pgadmin.tools.sqleditor.utils.query_tool_fs_utils import \
+    read_file_generator
 
 MODULE_NAME = 'sqleditor'
 
@@ -1360,16 +1362,7 @@ def load_file():
             errormsg=gettext("File type not supported")
         )
 
-    def gen():
-        with codecs.open(file_path, 'r', encoding=enc) as fileObj:
-            while True:
-                # 4MB chunk (4 * 1024 * 1024 Bytes)
-                data = fileObj.read(4194304)
-                if not data:
-                    break
-                yield data
-
-    return Response(gen(), mimetype='text/plain')
+    return Response(read_file_generator(file_path, enc), mimetype='text/plain')
 
 
 @blueprint.route('/save_file/', methods=["PUT", "POST"], endpoint='save_file')
diff --git a/web/pgadmin/tools/sqleditor/utils/query_tool_fs_utils.py b/web/pgadmin/tools/sqleditor/utils/query_tool_fs_utils.py
new file mode 100644
index 000000000..ad1df0f89
--- /dev/null
+++ b/web/pgadmin/tools/sqleditor/utils/query_tool_fs_utils.py
@@ -0,0 +1,53 @@
+##########################################################################
+#
+# pgAdmin 4 - PostgreSQL Tools
+#
+# Copyright (C) 2013 - 2018, The pgAdmin Development Team
+# This software is released under the PostgreSQL Licence
+#
+##########################################################################
+
+import codecs
+
+
+def read_file_generator(file, enc):
+    """
+    This will read the content of the file selected by user
+
+    Returns:
+        Content of file
+    """
+    try:
+        with codecs.open(file, 'r', encoding=enc) as fileObj:
+            while True:
+                # 4MB chunk (4 * 1024 * 1024 Bytes)
+                data = fileObj.read(4194304)
+                if not data:
+                    break
+                yield data
+    except UnicodeDecodeError:
+        # This is the closest equivalent Python 3 offers to the permissive
+        # Python 2 text handling model. The latin-1 encoding in Python
+        # implements ISO_8859-1:1987 which maps all possible byte values
+        # to the first 256 Unicode code points, and thus ensures decoding
+        # errors will never occur regardless of the configured error and
+        # handles most of the Windows encodings
+        # handler.
+        # Ref: https://goo.gl/vDhggS
+        with codecs.open(file, 'r', encoding='latin-1') as fileObj:
+            while True:
+                # 4MB chunk (4 * 1024 * 1024 Bytes)
+                data = fileObj.read(4194304)
+                if not data:
+                    break
+                yield data
+    except Exception:
+        # As a last resort we will use the provided encoding and then
+        # ignore the decoding errors
+        with codecs.open(file, 'r', encoding=enc, errors='ignore') as fileObj:
+            while True:
+                # 4MB chunk (4 * 1024 * 1024 Bytes)
+                data = fileObj.read(4194304)
+                if not data:
+                    break
+                yield data
diff --git a/web/pgadmin/tools/sqleditor/utils/tests/test_file_other_encoding.sql b/web/pgadmin/tools/sqleditor/utils/tests/test_file_other_encoding.sql
new file mode 100644
index 000000000..6e3bf3f1c
--- /dev/null
+++ b/web/pgadmin/tools/sqleditor/utils/tests/test_file_other_encoding.sql
@@ -0,0 +1,2 @@
+/*Copyright � 2017/*
+SELECT 1;
\ No newline at end of file
diff --git a/web/pgadmin/tools/sqleditor/utils/tests/test_file_utf8_encoding.sql b/web/pgadmin/tools/sqleditor/utils/tests/test_file_utf8_encoding.sql
new file mode 100644
index 000000000..735874149
--- /dev/null
+++ b/web/pgadmin/tools/sqleditor/utils/tests/test_file_utf8_encoding.sql
@@ -0,0 +1,2 @@
+/*Copyright © 2017/*
+SELECT 1;
\ No newline at end of file
diff --git a/web/pgadmin/tools/sqleditor/utils/tests/test_query_tool_fs_utils.py b/web/pgadmin/tools/sqleditor/utils/tests/test_query_tool_fs_utils.py
new file mode 100644
index 000000000..7cab06782
--- /dev/null
+++ b/web/pgadmin/tools/sqleditor/utils/tests/test_query_tool_fs_utils.py
@@ -0,0 +1,45 @@
+##########################################################################
+#
+# pgAdmin 4 - PostgreSQL Tools
+#
+# Copyright (C) 2013 - 2018, The pgAdmin Development Team
+# This software is released under the PostgreSQL Licence
+#
+##########################################################################
+import os
+from pgadmin.utils.route import BaseTestGenerator
+from pgadmin.tools.sqleditor.utils.query_tool_fs_utils import \
+    read_file_generator
+
+
+class TestReadFileGeneratorForEncoding(BaseTestGenerator):
+    """
+    Check that the start_running_query method works as intended
+    """
+
+    scenarios = [
+        (
+            'When user is trying to load the file with utf-8 encoding',
+            dict(
+                file='test_file_utf8_encoding.sql',
+                encoding='utf-8'
+            )
+        ),
+        (
+            'When user is trying to load the file with other encoding and'
+            ' trying to use utf-8 encoding to read it',
+            dict(
+                file='test_file_other_encoding.sql',
+                encoding='utf-8'
+            )
+        ),
+    ]
+
+    def setUp(self):
+        self.dir_path = os.path.dirname(os.path.realpath(__file__))
+        self.complate_path = os.path.join(self.dir_path, self.file)
+
+    def runTest(self):
+        result = read_file_generator(self.complate_path, self.encoding)
+        # Check if file is read properly by the generator
+        self.assertIn('SELECT 1', next(result))