Skip to content
Advertisement

Regex capture first text group within quotes per line

I’m working on writing a simple highlighter and I need to capture the all the text including the quotes, for the first word per line. How can I adjust this to do so? Currently this gets me every group of words within quotes, however i need just the first one.

Here are two regex i’ve found capture words within quotes ("[^"]*") (".*?[^\]")

enter image description here

I’m just tryin to make a simple json syntax highlighter in pyside.

import os
import sys
from PySide2 import QtCore, QtGui, QtWidgets

class SourceEditor(QtWidgets.QPlainTextEdit):
    def __init__(self, parent=None):
        super(SourceEditor, self).__init__(parent)
        font = QtGui.QFont()
        font.setFamily('Courier')
        font.setFixedPitch(True)
        font.setPointSize(10)
        self.setFont(font)

        self.highlighter = Highlighter(self.document())


class Highlighter(QtGui.QSyntaxHighlighter):
    def __init__(self, parent=None):
        super(Highlighter, self).__init__(parent)

        self.highlightingRules = []

        singleLineCommentFormat = QtGui.QTextCharFormat()
        singleLineCommentFormat.setFontItalic(True)
        singleLineCommentFormat.setForeground(QtGui.QColor(115,115,115))
        self.highlightingRules.append((QtCore.QRegExp("//[^n]*"), singleLineCommentFormat))

        self.multiLineCommentFormat = QtGui.QTextCharFormat()
        self.multiLineCommentFormat.setFontItalic(True)
        self.multiLineCommentFormat.setForeground(QtGui.QColor(115,115,115))

        quotationFormat = QtGui.QTextCharFormat()
        quotationFormat.setForeground(QtGui.QColor(230,145,100))
        self.highlightingRules.append((QtCore.QRegExp(""[^"]*""), quotationFormat))

        self.commentStartExpression = QtCore.QRegExp("/\*")
        self.commentEndExpression = QtCore.QRegExp("\*/")


    def highlightBlock(self, text):
        for pattern, format in self.highlightingRules:
            expression = QtCore.QRegExp(pattern)
            index = expression.indexIn(text)
            while index >= 0:
                length = expression.matchedLength()
                self.setFormat(index, length, format)
                index = expression.indexIn(text, index + length)

        self.setCurrentBlockState(0)

        startIndex = 0
        if self.previousBlockState() != 1:
            startIndex = self.commentStartExpression.indexIn(text)

        while startIndex >= 0:
            endIndex = self.commentEndExpression.indexIn(text, startIndex)

            if endIndex == -1:
                self.setCurrentBlockState(1)
                commentLength = len(text) - startIndex
            else:
                commentLength = endIndex - startIndex + self.commentEndExpression.matchedLength()

            self.setFormat(startIndex, commentLength, self.multiLineCommentFormat)
            startIndex = self.commentStartExpression.indexIn(text, startIndex + commentLength);


if __name__ == '__main__':
    app = QtWidgets.QApplication(sys.argv)
    window = SourceEditor()
    style.setStyle(widget=window)
    window.setPlainText('''
        [
            {
                "group": "Simple",
                "name": "Simple",
                "category name": "Apps",
                "icon": "Simple.svg",
                "paths": [
                    {   
                        "path": "notepad.exe"
                    }
                ]
            },
            // some comment here
            {
                "group": "Simple",
                "name": "Simple",
                "category name": "Simple",
                "icon": "Simple.svg"
                "paths": [
                    {   
                        "path": "notepad",
                        "args": "notepad.py" 
                    },
                    {   
                        "path": "run.exe",
                    }
                ]
            }
        ]
        ''')
    window.resize(640, 512)
    window.show()
    sys.exit(app.exec_())

Similar question…how do i capture numbers without the trailing comma? (d+),

[
    {
        "description": null,
        "entity": {
            "id": 343,
            "name": "07010",
            "type": "Shot"
        },
        "id": 1673,
        "project": {
            "id": 9,
            "name": "test10",
        }
    }
]

Advertisement

Answer

Use a capture group and return it:

^[ t]*("[^"]*")

See regex proof.

EXPLANATION

NODE EXPLANATION
^ the beginning of the string
[ t]* any character of: ‘ ‘, ‘t’ (tab) (0 or more times (matching the most amount possible))
( group and capture to 1:
" ‘”‘
[^"]* any character except: ‘”‘ (0 or more times (matching the most amount possible))
" ‘”‘
) end of 1
User contributions licensed under: CC BY-SA
7 People found this is helpful
Advertisement