Mercurial > kallithea
changeset 8062:9203621cae03
vcs: always return bytes from node.content
We will rather have the unicode conversions explicit.
Note: Py3 bytes doesn't have .startswith - replace that with a regexp.
author | Mads Kiilerich <mads@kiilerich.com> |
---|---|
date | Sat, 28 Dec 2019 01:08:48 +0100 |
parents | 7c43e15fb8bc |
children | 9bc709aa0614 |
files | kallithea/controllers/admin/gists.py kallithea/controllers/compare.py kallithea/controllers/feed.py kallithea/controllers/files.py kallithea/controllers/pullrequests.py kallithea/controllers/summary.py kallithea/lib/annotate.py kallithea/lib/diffs.py kallithea/lib/helpers.py kallithea/lib/indexers/daemon.py kallithea/lib/vcs/backends/git/inmemory.py kallithea/lib/vcs/backends/hg/inmemory.py kallithea/lib/vcs/nodes.py kallithea/lib/vcs/utils/annotate.py kallithea/templates/admin/gists/edit.html kallithea/templates/files/files_edit.html kallithea/tests/vcs/test_git.py kallithea/tests/vcs/test_hg.py |
diffstat | 18 files changed, 43 insertions(+), 51 deletions(-) [+] |
line wrap: on
line diff
--- a/kallithea/controllers/admin/gists.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/controllers/admin/gists.py Sat Dec 28 01:08:48 2019 +0100 @@ -182,7 +182,7 @@ log.error(traceback.format_exc()) raise HTTPNotFound() if format == 'raw': - content = '\n\n'.join([f.content for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)]) + content = '\n\n'.join([safe_unicode(f.content) for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)]) response.content_type = 'text/plain' return content return render('admin/gists/show.html')
--- a/kallithea/controllers/compare.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/controllers/compare.py Sat Dec 28 01:08:48 2019 +0100 @@ -272,7 +272,7 @@ ignore_whitespace=ignore_whitespace, context=line_context) - diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit) + diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit) c.limited_diff = diff_processor.limited_diff c.file_diff_data = [] c.lines_added = 0
--- a/kallithea/controllers/feed.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/controllers/feed.py Sat Dec 28 01:08:48 2019 +0100 @@ -94,7 +94,7 @@ desc_msg.extend(changes) if str2bool(CONFIG.get('rss_include_diff', False)): desc_msg.append('\n\n') - desc_msg.append(raw_diff) + desc_msg.append(safe_unicode(raw_diff)) desc_msg.append('</pre>') return [safe_unicode(chunk) for chunk in desc_msg]
--- a/kallithea/controllers/files.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/controllers/files.py Sat Dec 28 01:08:48 2019 +0100 @@ -46,7 +46,7 @@ from kallithea.lib.base import BaseRepoController, jsonify, render from kallithea.lib.exceptions import NonRelativePathError from kallithea.lib.utils import action_logger -from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, str2bool +from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, safe_unicode, str2bool from kallithea.lib.vcs.backends.base import EmptyChangeset from kallithea.lib.vcs.conf import settings from kallithea.lib.vcs.exceptions import ( @@ -365,8 +365,7 @@ c.f_path = f_path if r_post: - - old_content = c.file.content + old_content = safe_unicode(c.file.content) sl = old_content.splitlines(1) first_line = sl[0] if sl else '' # modes: 0 - Unix, 1 - Mac, 2 - DOS
--- a/kallithea/controllers/pullrequests.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/controllers/pullrequests.py Sat Dec 28 01:08:48 2019 +0100 @@ -591,7 +591,7 @@ ignore_whitespace=ignore_whitespace, context=line_context) except ChangesetDoesNotExistError: raw_diff = _("The diff can't be shown - the PR revisions could not be found.") - diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit) + diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit) c.limited_diff = diff_processor.limited_diff c.file_diff_data = [] c.lines_added = 0
--- a/kallithea/controllers/summary.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/controllers/summary.py Sat Dec 28 01:08:48 2019 +0100 @@ -46,7 +46,7 @@ from kallithea.lib.compat import json from kallithea.lib.markup_renderer import MarkupRenderer from kallithea.lib.page import Page -from kallithea.lib.utils2 import safe_int +from kallithea.lib.utils2 import safe_int, safe_unicode from kallithea.lib.vcs.backends.base import EmptyChangeset from kallithea.lib.vcs.exceptions import ChangesetError, EmptyRepositoryError, NodeDoesNotExistError from kallithea.lib.vcs.nodes import FileNode @@ -84,7 +84,7 @@ readme_file = f log.debug('Found README file `%s` rendering...', readme_file) - readme_data = renderer.render(readme.content, + readme_data = renderer.render(safe_unicode(readme.content), filename=f) break except NodeDoesNotExistError:
--- a/kallithea/lib/annotate.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/lib/annotate.py Sat Dec 28 01:08:48 2019 +0100 @@ -30,6 +30,7 @@ from kallithea.lib.vcs.exceptions import VCSError from kallithea.lib.vcs.nodes import FileNode +from kallithea.lib.vcs.utils import safe_unicode def annotate_highlight(filenode, annotate_from_changeset_func=None, @@ -53,7 +54,7 @@ headers=headers, annotate_from_changeset_func=annotate_from_changeset_func, **options) lexer = get_custom_lexer(filenode.extension) or filenode.lexer - highlighted = highlight(filenode.content, lexer, formatter) + highlighted = highlight(safe_unicode(filenode.content), lexer, formatter) return highlighted
--- a/kallithea/lib/diffs.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/lib/diffs.py Sat Dec 28 01:08:48 2019 +0100 @@ -289,8 +289,8 @@ based on that parameter cut off will be triggered, set to None to show full diff """ - if not isinstance(diff, basestring): - raise Exception('Diff must be a basestring got %s instead' % type(diff)) + if not isinstance(diff, bytes): + raise Exception('Diff must be bytes - got %s' % type(diff)) self._diff = diff self.adds = 0 @@ -516,6 +516,9 @@ """, re.VERBOSE | re.MULTILINE) +_header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''') + + def _get_header(vcs, diff_chunk): """ Parses a Git diff for a single file (header and chunks) and returns a tuple with: @@ -537,7 +540,7 @@ raise Exception('diff not recognized as valid %s diff' % vcs) meta_info = match.groupdict() rest = diff_chunk[match.end():] - if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '): + if rest and _header_next_check.match(rest): raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, diff_chunk[:match.end()], rest[:1000])) diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do return meta_info, diff_lines
--- a/kallithea/lib/helpers.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/lib/helpers.py Sat Dec 28 01:08:48 2019 +0100 @@ -330,7 +330,7 @@ """ lexer = get_custom_lexer(filenode.extension) or filenode.lexer return literal(markup_whitespace( - code_highlight(filenode.content, lexer, CodeHtmlFormatter(**kwargs)))) + code_highlight(safe_unicode(filenode.content), lexer, CodeHtmlFormatter(**kwargs)))) def pygmentize_annotation(repo_name, filenode, **kwargs):
--- a/kallithea/lib/indexers/daemon.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/lib/indexers/daemon.py Sat Dec 28 01:08:48 2019 +0100 @@ -182,12 +182,13 @@ indexed = indexed_w_content = 0 if self.is_indexable_node(node): - u_content = node.content - if not isinstance(u_content, unicode): + bytes_content = node.content + if b'\0' in bytes_content: log.warning(' >> %s - no text content', path) u_content = u'' else: log.debug(' >> %s', path) + u_content = safe_unicode(bytes_content) indexed_w_content += 1 else:
--- a/kallithea/lib/vcs/backends/git/inmemory.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/lib/vcs/backends/git/inmemory.py Sat Dec 28 01:08:48 2019 +0100 @@ -68,11 +68,7 @@ # for dirnames (in reverse order) [this only applies for nodes from added] new_trees = [] - if not node.is_binary: - content = node.content.encode(ENCODING) - else: - content = node.content - blob = objects.Blob.from_string(content) + blob = objects.Blob.from_string(node.content) node_path = safe_bytes(node.name) if dirnames:
--- a/kallithea/lib/vcs/backends/hg/inmemory.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/lib/vcs/backends/hg/inmemory.py Sat Dec 28 01:08:48 2019 +0100 @@ -52,8 +52,7 @@ for node in self.added: if node.path == path: return memfilectx(_repo, memctx, path=node.path, - data=(node.content.encode('utf-8') - if not node.is_binary else node.content), + data=node.content, islink=False, isexec=node.is_executable, copysource=False) @@ -62,8 +61,7 @@ for node in self.changed: if node.path == path: return memfilectx(_repo, memctx, path=node.path, - data=(node.content.encode('utf-8') - if not node.is_binary else node.content), + data=node.content, islink=False, isexec=node.is_executable, copysource=False)
--- a/kallithea/lib/vcs/nodes.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/lib/vcs/nodes.py Sat Dec 28 01:08:48 2019 +0100 @@ -16,7 +16,7 @@ from kallithea.lib.vcs.backends.base import EmptyChangeset from kallithea.lib.vcs.exceptions import NodeError, RemovedFileNodeError -from kallithea.lib.vcs.utils import safe_str, safe_unicode +from kallithea.lib.vcs.utils import safe_bytes, safe_str, safe_unicode from kallithea.lib.vcs.utils.lazy import LazyProperty @@ -263,6 +263,10 @@ raise NodeError("Cannot use both content and changeset") super(FileNode, self).__init__(path, kind=NodeKind.FILE) self.changeset = changeset + if not isinstance(content, bytes) and content is not None: + # File content is one thing that inherently must be bytes ... but + # VCS module tries to be "user friendly" and support unicode ... + content = safe_bytes(content) self._content = content self._mode = mode or 0o100644 @@ -278,25 +282,17 @@ mode = self._mode return mode - def _get_content(self): + @property + def content(self): + """ + Returns lazily byte content of the FileNode. + """ if self.changeset: content = self.changeset.get_file_content(self.path) else: content = self._content return content - @property - def content(self): - """ - Returns lazily content of the FileNode. If possible, would try to - decode content from UTF-8. - """ - content = self._get_content() - - if bool(content and '\0' in content): - return content - return safe_unicode(content) - @LazyProperty def size(self): if self.changeset: @@ -366,7 +362,7 @@ """ from pygments import lexers try: - lexer = lexers.guess_lexer_for_filename(self.name, self.content, stripnl=False) + lexer = lexers.guess_lexer_for_filename(self.name, safe_unicode(self.content), stripnl=False) except lexers.ClassNotFound: lexer = lexers.TextLexer(stripnl=False) # returns first alias @@ -414,8 +410,7 @@ """ Returns True if file has binary content. """ - _bin = '\0' in self._get_content() - return _bin + return b'\0' in self.content def is_browser_compatible_image(self): return self.mimetype in [
--- a/kallithea/lib/vcs/utils/annotate.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/lib/vcs/utils/annotate.py Sat Dec 28 01:08:48 2019 +0100 @@ -3,6 +3,7 @@ from kallithea.lib.vcs.exceptions import VCSError from kallithea.lib.vcs.nodes import FileNode +from kallithea.lib.vcs.utils import safe_unicode def annotate_highlight(filenode, annotate_from_changeset_func=None, @@ -24,9 +25,7 @@ formatter = AnnotateHtmlFormatter(filenode=filenode, order=order, headers=headers, annotate_from_changeset_func=annotate_from_changeset_func, **options) - lexer = filenode.lexer - highlighted = highlight(filenode.content, lexer, formatter) - return highlighted + return highlight(safe_unicode(filenode.content), filenode.lexer, formatter) class AnnotateHtmlFormatter(HtmlFormatter):
--- a/kallithea/templates/admin/gists/edit.html Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/templates/admin/gists/edit.html Sat Dec 28 01:08:48 2019 +0100 @@ -73,7 +73,7 @@ </div> <div class="panel-body no-padding"> <div id="editor_container"> - <textarea id="editor_${h.FID('f',file.path)}" name="contents" style="display:none">${file.content}</textarea> + <textarea id="editor_${h.FID('f',file.path)}" name="contents" style="display:none">${safe_unicode(file.content)}</textarea> </div> </div> </div>
--- a/kallithea/templates/files/files_edit.html Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/templates/files/files_edit.html Sat Dec 28 01:08:48 2019 +0100 @@ -59,7 +59,7 @@ </span> </div> <div class="panel-body no-padding"> - <textarea id="editor" name="content" style="display:none">${h.escape(c.file.content)|n}</textarea> + <textarea id="editor" name="content" style="display:none">${h.escape(safe_unicode(c.file.content))|n}</textarea> </div> </div> <div>
--- a/kallithea/tests/vcs/test_git.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/tests/vcs/test_git.py Sat Dec 28 01:08:48 2019 +0100 @@ -596,11 +596,11 @@ for cs in self.repo: assert isinstance(cs.author, unicode) - def test_repo_files_content_is_unicode(self): + def test_repo_files_content_is_bytes(self): changeset = self.repo.get_changeset() for node in changeset.get_node('/'): if node.is_file(): - assert isinstance(node.content, unicode) + assert isinstance(node.content, bytes) def test_wrong_path(self): # There is 'setup.py' in the root dir but not there:
--- a/kallithea/tests/vcs/test_hg.py Fri Dec 27 00:26:14 2019 +0100 +++ b/kallithea/tests/vcs/test_hg.py Sat Dec 28 01:08:48 2019 +0100 @@ -544,11 +544,11 @@ for cm in self.repo: assert isinstance(cm.author, unicode) - def test_repo_files_content_is_unicode(self): + def test_repo_files_content_is_bytes(self): test_changeset = self.repo.get_changeset(100) for node in test_changeset.get_node('/'): if node.is_file(): - assert isinstance(node.content, unicode) + assert isinstance(node.content, bytes) def test_wrong_path(self): # There is 'setup.py' in the root dir but not there: