From 59c55233c29134410d2172a893ff4e7d3d374385 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Valenta?= <git@imaniti.org>
Date: Sun, 23 Apr 2023 00:27:06 +0200
Subject: [PATCH] signing party representatives, more normalization

---
 .../commands/import_old_contracts.py          | 192 +++++++++++++++++-
 ...naturerepresentative_signature_and_more.py |  23 +++
 contracts/models.py                           |   4 +-
 3 files changed, 210 insertions(+), 9 deletions(-)
 create mode 100644 contracts/migrations/0056_rename_contractee_signature_contracteesignaturerepresentative_signature_and_more.py

diff --git a/contracts/management/commands/import_old_contracts.py b/contracts/management/commands/import_old_contracts.py
index 4264bbb..7a9aa42 100644
--- a/contracts/management/commands/import_old_contracts.py
+++ b/contracts/management/commands/import_old_contracts.py
@@ -20,6 +20,8 @@ from ...models import (
     Signee,
     SigneeSignature,
     ContracteeSignature,
+    ContracteeSignatureRepresentative,
+    SigneeSignatureRepresentative,
 )
 
 
@@ -32,6 +34,7 @@ class Command(BaseCommand):
         self.normal_import_count = 0
         self.partial_import_count = 0
         self.already_imported_count = 0
+        self.normalization_count = 0
         self.issue_count = 0
         self.fatal_error_count = 0
 
@@ -245,6 +248,7 @@ class Command(BaseCommand):
             .replace("-32", "-31")
             .replace("\t", " ")
         )
+        self.normalization_count += 1
 
         yaml_source = split_contents[1]
 
@@ -265,7 +269,12 @@ class Command(BaseCommand):
         slug: str,
         contract: Contract,
         signing_party: dict
-    ) -> tuple[Contract|Signee, bool, int]:
+    ) -> tuple[
+        Contract|Signee,
+        list[ContracteeSignatureRepresentative|SigneeSignatureRepresentative],
+        bool,
+        int
+    ]:
         issue_count = 0
 
         if (
@@ -293,7 +302,83 @@ class Command(BaseCommand):
             )
         ).strip()
 
+        patterns = (
+            (
+                r"^(1\. Pirátská s\.r\.o|1\.Pirátská s\.r\.o\.)$",
+                "1. Pirátská s.r.o."
+            ),
+            (
+                r"České pojišťovna, a.s.",
+                "Česká pojišťovna, a.s."
+            ),
+            (
+                r"Datrolex, s.r.o.",
+                "DATROLEX, s.r.o."
+            ),
+            (
+                r"^Jiri ",
+                "Jiří "
+            ),
+            (
+                (
+                    r"^(Křesťanská a demokratická unie – Československá strana lidová|"
+                    r"Křesťansko demokratická unie – Československá strana lidová)$"
+                ),
+                "Křesťanská a demokratická unie – Československá strana lidová"
+            ),
+            (
+                r"LN - Audit s\.r\.o\."
+                "LN-AUDIT s.r.o."
+            ),
+            (
+                r"Olga Richteová",
+                "Olga Richterová"
+            ),
+            (
+                r"^(politické hnutí Změna|PolitickéHnutí Změna)$",
+                "Politické hnutí Změna"
+            ),
+            (
+                r"^Systemický institut s\.r\.o\$",
+                "Systemický institut, s.r.o."
+            ),
+            (
+                r"^Václav fořtík$",
+                "Václav Fořtík"
+            ),
+            (
+                r"^Vodafone$",
+                "Vodafone Czech Republic a.s."
+            ),
+            (
+                r"^VojtěchHolík$",
+                "Vojtěch Holík"
+            ),
+            (
+                r"^Vojtech ",
+                "Vojtěch "
+            ),
+            (
+                r"^Zdenek ",
+                "Zdeněk "
+            ),
+            (
+                r" Bohmova$",
+                " Bohmová"
+            ),
+            (
+                r" (KUdláčková|Kudlláčková)$",
+                " Kudláčková"
+            )
+        )
+
+        for pattern in patterns:
+            name = re.sub(pattern[0], pattern[1], name)
+
+        self.normalization_count += 1
+
         is_contractee = False
+        representatives = []
 
         if name.lower() in (
             "česká pirátská strana",
@@ -302,10 +387,12 @@ class Command(BaseCommand):
             "česká pirátská stran",
         ):
             model = Contractee
+            representative_model = ContracteeSignatureRepresentative
             instance = model()
             is_contractee = True
         else:
             model = Signee
+            representative_model = SigneeSignatureRepresentative
             instance = model(name=name, address_country="Česká republika")
 
         for signing_party_key, signing_party_value in signing_party.items():
@@ -344,7 +431,7 @@ class Command(BaseCommand):
                         instance.address_street_with_number = address["road"]
 
                     if "house_number" in address:
-                        instance.address_street_with_number += address["house_number"]
+                        instance.address_street_with_number += f" {address['house_number']}"
 
                     for address_key, address_value in address.items():
                         match address_key:
@@ -352,8 +439,79 @@ class Command(BaseCommand):
                                 instance.address_district = address_value
                             case "postcode":
                                 instance.address_zip = address_value
+
+                    self.normalization_count += 1
                 case "IČ":
+                    if not isinstance(signing_party_value, int|str):
+                        issue_count += 1
+                        contract.notes += f"Špatně zadané IČO smluvní strany: {signing_party_value}\n"
+
+                        if self.verbosity >= 2:
+                            self.stderr.write(
+                                self.style.NOTICE(
+                                    f"Contract {slug} has an invalid signing party IČO: {signing_party_value}."
+                                )
+                            )
+
+                        continue
+
                     instance.ico_number = signing_party_value
+                case "zástupce":
+                    if not isinstance(signing_party_value, str|list):
+                        issue_count += 1
+                        contract.notes += f"Špatně zadaný zástupce smluvní strany: {signing_party_value}\n"
+
+                        if self.verbosity >= 2:
+                            self.stderr.write(
+                                self.style.NOTICE(
+                                    f"Contract {slug} has an invalid signing party "
+                                    f"representative: {signing_party_value}."
+                                )
+                            )
+
+                        continue
+
+                    if isinstance(signing_party_value, str):
+                        signing_party_value = re.sub(
+                            r",$",
+                            "",
+                            signing_party_value
+                        )
+                        self.normalization_count += 1
+
+                        representatives.append(
+                            representative_model(
+                                name=signing_party_value
+                            )
+                        )
+                    else:
+                        for representative_name in signing_party_value:
+                            if not isinstance(representative_name, str):
+                                issue_count += 1
+                                contract.notes += f"Špatně zadaný jeden ze zástupců smluvní strany: {representative_name}\n"
+
+                                if self.verbosity >= 2:
+                                    self.stderr.write(
+                                        self.style.NOTICE(
+                                            f"Contract {slug} has an invalid signing party "
+                                            f"representative list item: {representative_name}."
+                                        )
+                                    )
+
+                                continue
+
+                            representative_name = re.sub(
+                                r",$",
+                                "",
+                                representative_name
+                            )
+                            self.normalization_count += 1
+
+                            representatives.append(
+                                representative_model(
+                                    name=signing_party_value
+                                )
+                            )
 
         # Do our best to merge signing parties together.
         existing_instance = model.objects.filter(
@@ -363,11 +521,15 @@ class Command(BaseCommand):
                     (
                         models.Q(address_street_with_number=instance.address_street_with_number)
                         if instance.address_street_with_number is not None
-                        else models.Value(False)
+                        else models.Q(address_street_with_number__isnull=True)
                     ) | (
                         models.Q(date_of_birth=instance.date_of_birth)
                         if model is Signee and instance.date_of_birth is not None
-                        else models.Value(False)
+                        else (
+                            models.Q(date_of_birth__isnull=True)
+                            if model is Signee
+                            else models.Value(False)
+                        )
                     )
                 )
             )
@@ -383,7 +545,7 @@ class Command(BaseCommand):
         else:
             instance.save()
 
-        return instance, is_contractee, issue_count
+        return instance, representatives, is_contractee, issue_count
 
     def assign_contract_metadata(
         self,
@@ -450,6 +612,7 @@ class Command(BaseCommand):
                 case "použité smluvní typy":
                     if isinstance(value, str):
                         value = self.normalize_type(value)
+                        self.normalization_count += 1
 
                         try:
                             type_instance = ContractType.objects.get(name=value)
@@ -487,6 +650,7 @@ class Command(BaseCommand):
                             continue
 
                         type_name = self.normalize_type(type_name)
+                        self.normalization_count += 1
 
                         try:
                             type_instance = ContractType.objects.get(name=type_name)
@@ -588,6 +752,7 @@ class Command(BaseCommand):
                 case "místo uložení":
                     if isinstance(value, str):
                         value = self.normalize_filing_area(value)
+                        self.normalization_count += 1
 
                     try:
                         contract.paper_form_state = contract.PaperFormStates.STORED
@@ -637,12 +802,15 @@ class Command(BaseCommand):
 
                             continue
 
-                        instance, is_contractee, signing_party_issue_count = (
+                        instance, representatives, is_contractee, signing_party_issue_count = (
                             self.assign_signing_party_metadata(slug, contract, signing_party)
                         )
 
                         observed_issues_count += signing_party_issue_count
 
+                        # Store representatives in relation to the instance, hacky but good enough
+                        instance._representatives = representatives
+
                         if is_contractee:
                             contractees.append(instance)
                         else:
@@ -653,6 +821,7 @@ class Command(BaseCommand):
                 isinstance(contract.name, str)
                 and re.sub(r"/\s\s+/", "", contract.name) == ""
             ):
+                self.normalization_count += 1
                 contract.name = slug
 
             if contract.valid_start_date is None:
@@ -686,6 +855,10 @@ class Command(BaseCommand):
 
                 contract.contractee_signatures.add(signature)
 
+                for representative in contractee._representatives:
+                    representative.signature = signature
+                    representative.save()
+
             for signee in signees:
                 signee.save()
 
@@ -699,6 +872,10 @@ class Command(BaseCommand):
 
                 contract.signee_signatures.add(signature)
 
+                for representative in signee._representatives:
+                    representative.signature = signature
+                    representative.save()
+
             contract.filing_area = filing_area
             contract.types.set(types)
             contract.save()
@@ -811,7 +988,8 @@ class Command(BaseCommand):
                     f"Saved a total of {self.normal_import_count + self.partial_import_count} contracts.\n"
                     f"    {self.partial_import_count} contained a total of {self.issue_count} issues.\n"
                     f"    {self.already_imported_count} were already saved previously and skipped.\n"
-                    f"    {self.fatal_error_count} potential contracts were unparseable."
+                    f"    {self.fatal_error_count} potential contracts were unparseable.\n"
+                    f"    {self.normalization_count} data points were normalized."
                 )
             )
 
diff --git a/contracts/migrations/0056_rename_contractee_signature_contracteesignaturerepresentative_signature_and_more.py b/contracts/migrations/0056_rename_contractee_signature_contracteesignaturerepresentative_signature_and_more.py
new file mode 100644
index 0000000..c3ae4aa
--- /dev/null
+++ b/contracts/migrations/0056_rename_contractee_signature_contracteesignaturerepresentative_signature_and_more.py
@@ -0,0 +1,23 @@
+# Generated by Django 4.1.4 on 2023-04-22 22:13
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('contracts', '0055_alter_contractee_address_zip_and_more'),
+    ]
+
+    operations = [
+        migrations.RenameField(
+            model_name='contracteesignaturerepresentative',
+            old_name='contractee_signature',
+            new_name='signature',
+        ),
+        migrations.RenameField(
+            model_name='signeesignaturerepresentative',
+            old_name='signee_signature',
+            new_name='signature',
+        ),
+    ]
diff --git a/contracts/models.py b/contracts/models.py
index fad812e..2d2b963 100644
--- a/contracts/models.py
+++ b/contracts/models.py
@@ -947,7 +947,7 @@ class SigneeSignature(models.Model):
 
 
 class ContracteeSignatureRepresentative(RepresentativeMixin, models.Model):
-    contractee_signature = models.ForeignKey(
+    signature = models.ForeignKey(
         ContracteeSignature,
         on_delete=models.CASCADE,
         related_name="representatives",
@@ -972,7 +972,7 @@ class ContracteeSignatureRepresentative(RepresentativeMixin, models.Model):
 
 
 class SigneeSignatureRepresentative(RepresentativeMixin, models.Model):
-    signee_signature = models.ForeignKey(
+    signature = models.ForeignKey(
         SigneeSignature,
         on_delete=models.CASCADE,
         related_name="representatives",
-- 
GitLab