I’m working on writing a simple highlighter and I need to capture the all the text including the quotes, for the first word per line. How can I adjust this to do so? Currently this gets me every group of words within quotes, however i need just the first one.
Here are two regex i’ve found capture words within quotes
("[^"]*")
(".*?[^\]")
I’m just tryin to make a simple json syntax highlighter in pyside.
import os import sys from PySide2 import QtCore, QtGui, QtWidgets class SourceEditor(QtWidgets.QPlainTextEdit): def __init__(self, parent=None): super(SourceEditor, self).__init__(parent) font = QtGui.QFont() font.setFamily('Courier') font.setFixedPitch(True) font.setPointSize(10) self.setFont(font) self.highlighter = Highlighter(self.document()) class Highlighter(QtGui.QSyntaxHighlighter): def __init__(self, parent=None): super(Highlighter, self).__init__(parent) self.highlightingRules = [] singleLineCommentFormat = QtGui.QTextCharFormat() singleLineCommentFormat.setFontItalic(True) singleLineCommentFormat.setForeground(QtGui.QColor(115,115,115)) self.highlightingRules.append((QtCore.QRegExp("//[^n]*"), singleLineCommentFormat)) self.multiLineCommentFormat = QtGui.QTextCharFormat() self.multiLineCommentFormat.setFontItalic(True) self.multiLineCommentFormat.setForeground(QtGui.QColor(115,115,115)) quotationFormat = QtGui.QTextCharFormat() quotationFormat.setForeground(QtGui.QColor(230,145,100)) self.highlightingRules.append((QtCore.QRegExp(""[^"]*""), quotationFormat)) self.commentStartExpression = QtCore.QRegExp("/\*") self.commentEndExpression = QtCore.QRegExp("\*/") def highlightBlock(self, text): for pattern, format in self.highlightingRules: expression = QtCore.QRegExp(pattern) index = expression.indexIn(text) while index >= 0: length = expression.matchedLength() self.setFormat(index, length, format) index = expression.indexIn(text, index + length) self.setCurrentBlockState(0) startIndex = 0 if self.previousBlockState() != 1: startIndex = self.commentStartExpression.indexIn(text) while startIndex >= 0: endIndex = self.commentEndExpression.indexIn(text, startIndex) if endIndex == -1: self.setCurrentBlockState(1) commentLength = len(text) - startIndex else: commentLength = endIndex - startIndex + self.commentEndExpression.matchedLength() self.setFormat(startIndex, commentLength, self.multiLineCommentFormat) startIndex = self.commentStartExpression.indexIn(text, startIndex + commentLength); if __name__ == '__main__': app = QtWidgets.QApplication(sys.argv) window = SourceEditor() style.setStyle(widget=window) window.setPlainText(''' [ { "group": "Simple", "name": "Simple", "category name": "Apps", "icon": "Simple.svg", "paths": [ { "path": "notepad.exe" } ] }, // some comment here { "group": "Simple", "name": "Simple", "category name": "Simple", "icon": "Simple.svg" "paths": [ { "path": "notepad", "args": "notepad.py" }, { "path": "run.exe", } ] } ] ''') window.resize(640, 512) window.show() sys.exit(app.exec_())
Similar question…how do i capture numbers without the trailing comma?
(d+),
[ { "description": null, "entity": { "id": 343, "name": "07010", "type": "Shot" }, "id": 1673, "project": { "id": 9, "name": "test10", } } ]
Advertisement
Answer
Use a capture group and return it:
^[ t]*("[^"]*")
See regex proof.
EXPLANATION
NODE | EXPLANATION |
---|---|
^ |
the beginning of the string |
[ t]* |
any character of: ‘ ‘, ‘t’ (tab) (0 or more times (matching the most amount possible)) |
( |
group and capture to 1: |
" |
‘”‘ |
[^"]* |
any character except: ‘”‘ (0 or more times (matching the most amount possible)) |
" |
‘”‘ |
) |
end of 1 |