Currently, I am trying to create a pydantic model for a pandas dataframe. I would like to check if a column is unique by the following
import pandas as pd
from typing import List
from pydantic import BaseModel
class CustomerRecord(BaseModel):
    
    id: int
    name: str
    address: str
class CustomerRecordDF(BaseModel):
    
    __root__: List[CustomerRecord]
df = pd.DataFrame({'id':[1,2,3], 
                   'name':['Bob','Joe','Justin'], 
                   'address': ['123 Fake St', '125 Fake St', '123 Fake St']})
df_dict = df.to_dict(orient='records')
CustomerRecordDF.parse_obj(df_dict)
I would now like to run a validation here and have it fail since address is not unique.
The following returns what I need
from pydantic import root_validator
class CustomerRecordDF(BaseModel):
    
    __root__: List[CustomerRecord]
    
    @root_validator(pre=True)
    def unique_values(cls, values):
        root_values = values.get('__root__')
        value_set = set()
        for value in root_values:
            print(value['address'])
            
            
            if value['address'] in value_set:
                raise ValueError('Duplicate Address')
            else:
                value_set.add(value['address'])
        return values
CustomerRecordDF.parse_obj(df_dict)
>>> ValidationError: 1 validation error for CustomerRecordDF
  __root__
  Duplicate Address (type=value_error)
but i want to be able to reuse this validator for other other dataframes I create and to also pass in this unique check on multiple columns. Not just address.
Ideally something like the following
from pydantic import root_validator
class CustomerRecordDF(BaseModel):
    __root__: List[CustomerRecord]
    
    _validate_unique_name = root_unique_validator('name')
    _validate_unique_address = root_unique_validator('address')
Advertisement
Answer
You could use an inner function and the allow_reuse argument:
def root_unique_validator(field):
    def validator(cls, values):
        # Use the field arg to validate a specific field
        ...
    return root_validator(pre=True, allow_reuse=True)(validator)
Full example:
import pandas as pd
from typing import List
from pydantic import BaseModel, root_validator
class CustomerRecord(BaseModel):
    id: int
    name: str
    address: str
def root_unique_validator(field):
    def validator(cls, values):
        root_values = values.get("__root__")
        value_set = set()
        for value in root_values:
            if value[field] in value_set:
                raise ValueError(f"Duplicate {field}")
            else:
                value_set.add(value[field])
        return values
    return root_validator(pre=True, allow_reuse=True)(validator)
class CustomerRecordDF(BaseModel):
    __root__: List[CustomerRecord]
    _validate_unique_name = root_unique_validator("name")
    _validate_unique_address = root_unique_validator("address")
df = pd.DataFrame(
    {
        "id": [1, 2, 3],
        "name": ["Bob", "Joe", "Justin"],
        "address": ["123 Fake St", "125 Fake St", "123 Fake St"],
    }
)
df_dict = df.to_dict(orient="records")
CustomerRecordDF.parse_obj(df_dict)
# Output:
# pydantic.error_wrappers.ValidationError: 1 validation error for CustomerRecordDF
# __root__
#   Duplicate address (type=value_error)
And if you use a duplicated name:
# Here goes the most part of the full example above
df = pd.DataFrame(
    {
        "id": [1, 2, 3],
        "name": ["Bob", "Joe", "Bob"],
        "address": ["123 Fake St", "125 Fake St", "127 Fake St"],
    }
)
df_dict = df.to_dict(orient="records")
CustomerRecordDF.parse_obj(df_dict)
# Output:
# pydantic.error_wrappers.ValidationError: 1 validation error for CustomerRecordDF
# __root__
#   Duplicate name (type=value_error)
You could also receive more than one field and have a single root validator that validates all the fields. That will probably make the allow_reuse argument unnecessary.