From fa7f711ebc64611d4355bbb5945a5c4a04b35f9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexa=20Valentov=C3=A1?= <git@imaniti.org> Date: Tue, 3 Sep 2024 20:40:33 +0200 Subject: [PATCH] semi-finish import task (again) --- .../commands/import_old_contracts.py | 147 ++++++++++++++++-- 1 file changed, 135 insertions(+), 12 deletions(-) diff --git a/contracts/management/commands/import_old_contracts.py b/contracts/management/commands/import_old_contracts.py index d58d9ed..bc9ace3 100644 --- a/contracts/management/commands/import_old_contracts.py +++ b/contracts/management/commands/import_old_contracts.py @@ -410,14 +410,25 @@ class Command(BaseCommand): patterns = ( (r"\s\s+", " "), - (r"(B|b)c\.|,\s(P|p)h\.(D|d)\.|(M|m)g(r|A|a)\.|(I|i)ng\.|PeadDr\.|PeaDr\.|(P|p)h(D|d)r\.", ""), + (r"^(B|b)(C|c)(\.| )|MDDr\.|MDDR\.|^(A|a)rch\.|,\s(M|m)(B|b)(A|a)$|,\s(M|m)sc\.$|^(M|m)(U|u)(D|d)r\.|,\s(P|p)h\.(D|d)(\.|)$|^(M|m)g(r|A|a)\.|^(I|i)ng\.|^PeadDr\.|^PeaDr\.|^(P|p)h(D|d)r\.", ""), + (r"\s(e|E)t\s", ""), + (r"\s\s+", " "), + (r"^ ", ""), + (r"^(B|b)c\.|^(A|a)rch\.|,\s(M|m)(B|b)(A|a)$|,\s(M|m)sc\.$|^(M|m)(U|u)(D|d)r\.|,\s(P|p)h\.(D|d)(\.|)$|^(M|m)g(r|A|a)\.|^(I|i)ng\.|^PeadDr\.|^PeaDr\.|^(P|p)h(D|d)r\.", ""), + (r"\s(e|E)t\s", ""), + (r"\s\s+", " "), + (r"^ ", ""), + (r"^(B|b)c\.|^(A|a)rch\.|,\s(M|m)(B|b)(A|a)$|,\s(M|m)sc\.$|^(M|m)(U|u)(D|d)r\.|,\s(P|p)h\.(D|d)(\.|)$|^(M|m)g(r|A|a)\.|^(I|i)ng\.|^PeadDr\.|^PeaDr\.|^(P|p)h(D|d)r\.", ""), + (r"\s(e|E)t\s", ""), + (r"\s\s+", " "), + (r"^ ", ""), + (r"^(B|b)c\.|^(A|a)rch\.|,\s(M|m)(B|b)(A|a)$|,\s(M|m)sc\.$|^(M|m)(U|u)(D|d)r\.|,\s(P|p)h\.(D|d)(\.|)$|^(M|m)g(r|A|a)\.|^(I|i)ng\.|^PeadDr\.|^PeaDr\.|^(P|p)h(D|d)r\.", ""), (r"\s(e|E)t\s", ""), (r"\s\s+", " "), (r"^ ", ""), (r"^Alvarium, s\.r\.o\.", "Alvarium s.r.o."), (r"^PolitickĂ© hnutĂ Senátor 21$", "PolitickĂ© hnutĂ SENĂTOR 21"), (r"^Petr Springfield$", "Petr Springinsfeld"), - (r"RAINREKNAM", "RAILREKLAM"), (r"^Šárka václavĂková$", "Šárka VáclavĂková"), (r"^Ĺ tepán Drtina$", "Ĺ tÄ›pán Drtina"), (r"^(1\. Pirátská s\.r\.o|1\.Pirátská s\.r\.o\.)$", "1. Pirátská s.r.o."), @@ -443,7 +454,7 @@ class Command(BaseCommand): (r"^iveta", "Iveta"), (r"^Jan Bohm$", "Jan Böhm"), ( - r"^Kooperativa$|^Kooperativa pojišťovna, a\.s\.$", + r"^Kooperativa$|^Kooperativa pojišťovna a\.s\.$|^Kooperativa pojišťovna, a\.s\.$", "Kooperativa pojišťovna, a.s., Vienna Insurance Group" ), (r"^Dominika P\. Michailidu$", "Dominika PoĹľivilová Michailidu"), @@ -459,8 +470,45 @@ class Command(BaseCommand): (r" Bohmova$", " Bohmová"), (r"^Vratislav filĂpek$", "Vratislav FilĂpek"), (r"^W Czech development$", "W Czech Development s.r.o."), + (r" (KUdláčková|Kudlláčková)$", " Kudláčková"), + (r"^Jiřà knotek$", "Jiřà Knotek"), + (r"^JIřà RoubĂÄŤek$", "Jiřà RoubĂÄŤek"), + (r"^Koalice Vlasta\. z\.s\.$", "Koalice Vlasta, z.s."), + (r"^Mikuáš ", "Mikuláš "), + (r"^VĂtÄ›zslav Adamec, pĹ™edseda KS KarlovarskĂ˝ kraj$", "VĂtÄ›zslav Adamec"), + (r"^Strana zelenĂ˝ch$", "Strana ZelenĂ˝ch"), + (r"^SystemickĂ˝ institut s\.r\.o\.$", "SystemickĂ˝ institut, s.r.o."), + (r"^AdĂ©la hradilová$|^Adela Hradilova$", "AdĂ©la Hradilová"), + (r"^KoncervativnĂ strana$", "KonzervativnĂ strana"), + (r"^Martin kolář$", "Martin Kolář"), + (r"^MIchal NupeÄŤka$", "Michal NupeÄŤka"), + (r"^obÄŤanĂ© Prahy 10$", "ObÄŤanĂ© Prahy 10"), + (r"^Petr a Nudmila ĹehoĹ™ovi$", "Petr a Ludmila ĹehoĹ™ovi"), + (r"^Petr Apringinsfeld$", "Petr Springinsfeld"), + (r"^Petr Procházla$", "Petr Procházka"), + (r"^spolek VÄ›jĂĹ™ Kladno z.s.$", "Spolek VÄ›jĂĹ™ Kladno z.s."), + (r"^SvatmĂr MlÄŤoch$", "SvatomĂr MlÄŤoch"), + (r"^Tomáš Vymyzal$", "Tomáš Vymazal"), + (r"^Pavlovic Josef$", "Josef Pavlovic"), + (r"^MIlan OlerĂny$", "Milan OlerĂny"), + (r"^DoÄŤkal David$", "David DoÄŤkal"), + (r"^Jan žák$", "Jan Žák"), + (r"^MilušeKotišová$", "Miluše Kotišová"), + (r"RAINREKNAM", "RAILREKLAM"), + + + # Weird N-instead-of-L names (r"^Nadislav", "Ladislav"), (r"^Nukáš", "Lukáš"), + (r"^Nenka", "Lenka"), + (r"^PovodĂ Nabe, státnĂ podnik$", "PovodĂ Labe, státnĂ podnik"), + (r"^Prokop Naichter$", "Prokop Laichter"), + (r"^Tomáš Nakota$", "Tomáš Lakota"), + (r"^Tomáš NaštĹŻvka$", "Tomáš LaštĹŻvka"), + (r"^Tomáš NĂ©nárd$", "Tomáš LĂ©nárd"), + (r"^Zuzana Freitas Nopesová$", "Zuzana Freitas Lopesová"), + (r"^Zuzana Najksnerová$", "Zuzana Lajksnerová"), + (r"^Simona Nuftová$", "Simona Luftová"), (r"^Jan NiÄŤka$", "Jan LiÄŤka"), (r"^Jan NipavskĂ˝$", "Jan LipavskĂ˝"), (r"^Jan NouĹľek$", "Jan LouĹľek"), @@ -470,15 +518,90 @@ class Command(BaseCommand): (r"^Nadislav", "Ladislav"), (r"^Naureen Hollge$|^Naureen Holge$|^Naureen Höllge$", "Laureen Höllge"), (r"^OldĹ™ich NhotskĂ˝$", "OldĹ™ich LhotskĂ˝"), - (r" (KUdláčková|Kudlláčková)$", " Kudláčková"), - (r"^Jiřà knotek$", "Jiřà Knotek"), - (r"^JIřà RoubĂÄŤek$", "Jiřà RoubĂÄŤek"), - (r"^Koalice Vlasta\. z\.s\.$", "Koalice Vlasta, z.s."), - (r"^Mikuáš ", "Mikuláš "), - (r"^VĂtÄ›zslav Adamec, pĹ™edseda KS KarlovarskĂ˝ kraj$", "VĂtÄ›zslav Adamec"), - (r"^Strana zelenĂ˝ch$", "Strana ZelenĂ˝ch"), - (r"^SystemickĂ˝ institut s\.r\.o\.$", "SystemickĂ˝ institut, s.r.o."), - (r"^AdĂ©la hradilová$|^Adela Hradilova$", "AdĂ©la Hradilová"), + (r"^Václav Náska$", "Václav Láska"), + (r"^NumĂr Kantor$", "LumĂr Kantor"), + (r"^Nenka Matoušková$", "Lenka Matoušková"), + (r"^Nenka Blahovcová$", "Lenka Blahovcová"), + (r"^Nenka Dvořáková$", "Lenka Dvořáková"), + (r"^Nenka Horejsková$", "Lenka Horejsková"), + (r"^Nenka JĂĽngling$", "Lenka JĂĽngling"), + (r"^Nenka Kozlová$", "Lenka Kozlová"), + (r"^Nenka Mahdalová$", "Lenka Mahdalová"), + (r"^Nenka MartĂnková Ĺ panihelová$", "Lenka MartĂnková Ĺ panihelová"), + (r"^Nenka Nováková$", "Lenka Nováková"), + (r"^Nenka Svobodová$", "Lenka Svobodová"), + (r"^Neonard Vargaq$", "Leonard Vargaq"), + (r"^Neoš Jindra$", "Leoš Jindra"), + (r"^Neoš KĹ™eÄŤek$", "Leoš KĹ™eÄŤek"), + (r"^NiberálnÄ› ekologická strana$", "LiberálnÄ› ekologická strana"), + (r"^Nibor Adámek$", "Libor Adámek"), + (r"^Nibor DoubravskĂ˝$", "Libor DoubravskĂ˝"), + (r"^Nibor JanĂÄŤek$", "Libor JanĂÄŤek"), + (r"^Nibor Michálek$", "Libor Michálek"), + (r"^Nibor Semerák$", "Libor Semerák"), + (r"^Nibuše Juthnerova$", "Libuše Juthnerova"), + (r"^Nibuše (JĂĽthnerová|Juthnerova)$", "Libuše JĂĽthnerová"), + (r"^Nibuše Ratajová$", "Libuše Ratajová"), + (r"^Nibuše VěřĂšová$", "Libuše VěřĂšová"), + (r"^Nibuše VĂ©vodová$", "Libuše VĂ©vodová"), + (r"^Ninda Hlavatá$", "Linda Hlavatá"), + (r"^Ninda Hrubešová$", "Linda Hrubešová"), + (r"^Ninda Kašpárková$", "Linda Kašpárková"), + (r"^Ninda Neubergova$", "Linda Neubergova"), + (r"^NubomĂr DolaĹľal$", "LubomĂr DolaĹľal"), + (r"^NubomĂr Kudláček$", "LubomĂr Kudláček"), + (r"^NubomĂr NukšĂk$", "LubomĂr NukšĂk"), + (r"^NubomĂr Staniek$", "LubomĂr Staniek"), + (r"^NubomĂr Ĺ paÄŤek$", "LubomĂr Ĺ paÄŤek"), + (r"^NubomĂr Traub, Msc.$", "LubomĂr Traub, Msc."), + (r"^NubomĂ Ĺ afr$", "LubomĂr Ĺ afr"), + (r"^Nubor JenÄŤek$", "Lubor JenÄŤek"), + (r"^Nuboš Brokeš$", "Luboš Brokeš"), + (r"^Nuboš Kudláček$", "Luboš Kudláček"), + (r"^Nuboš Průša$", "Luboš Průša"), + (r"^Nuboš Sedlák$", "Luboš Sedlák"), + (r"^Nuboš SlavĂk$", "Luboš SlavĂk"), + (r"^Nucia KopĹ™ivovová Markovová$", "Lucia KopĹ™ivovová Markovová"), + (r"^Nucie Brusáková$", "Lucie Brusáková"), + (r"^Nucie CIrkva Chocholová$", "Lucie Cirkva Chocholová"), + (r"^Nucie Jandová$", "Lucie Jandová"), + (r"^Nucie MalĂková$", "Lucie MalĂková"), + (r"^Nucie Mikulová$", "Lucie Mikulová"), + (r"^Nucie Nadlická$", "Lucie Nadlická"), + (r"^Nucie NoĹľiÄŤková$", "Lucie NoĹľiÄŤková"), + (r"^Nucie OrbĂłk$", "Lucie OrbĂłk"), + (r"^Nucie Rychtarová$", "Lucie Rychtarová"), + (r"^Nucie Seguin$", "Lucie Seguin"), + (r"^Nucie SekniÄŤková$", "Lucie SekniÄŤková"), + (r"^Nucie Spáčilová$", "Lucie Spáčilová"), + (r"^Nucie Syptáková$", "Lucie Syptáková"), + (r"^Nucie Ĺ vehlĂková$", "Lucie Ĺ vehlĂková"), + (r"^Nucie TrÄŤková$", "Lucie TrÄŤková"), + (r"^Nucie Vadkerti$", "Lucie Vadkerti"), + (r"^Nudmila ĹehoĹ™ová$", "Ludmila ĹehoĹ™ová"), + (r"^Nudmila TomaščĂková$", "Ludmila TomaščĂková"), + (r"^Nukáš BartĂk$", "Lukáš BartĂk"), + (r"^Nukáš BartoĹ$", "Lukáš BartoĹ"), + (r"^Nukáš BednaĹ™Ăk$", "Lukáš BednaĹ™Ăk"), + (r"^Nukáš BlaĹľej$", "Lukáš BlaĹľej"), + (r"^Nukáš Bulava$", "Lukáš Bulava"), + (r"^Nukáš ÄŚernohorskĂ˝$", "Lukáš ÄŚernohorskĂ˝"), + (r"^Nukáš ÄŚervinka$", "Lukáš ÄŚervinka"), + (r"^Nukáš Dubec$", "Lukáš Dubec"), + (r"^Nukáš Havlena$", "Lukáš Havlena"), + (r"^Nukáš Hypša$", "Lukáš Hypša"), + (r"^Nukáš Chlebik$", "Lukáš Chlebik"), + (r"^Nukáš JadlovksĂ˝$", "Lukáš JadlovksĂ˝"), + (r"^Nukáš Kohout$", "Lukáš Kohout"), + (r"^Nukáš KolářĂk$", "Lukáš KolářĂk"), + (r"^Nukáš KostĂnek$", "Lukáš KostĂnek"), + (r"^Nukáš Mamula$", "Lukáš Mamula"), + (r"^Nukáš Mareš$", "Lukáš Mareš"), + (r"^Nukáš Nev ÄŚervinka$", "Lukáš Nev ÄŚervinka"), + (r"^Nukáš NovĂ˝$", "Lukáš NovĂ˝"), + (r"^Nukáš RyšavĂ˝$", "Lukáš RyšavĂ˝"), + (r"^Nukáš Smetana$", "Lukáš Smetana"), + (r"^Nukáš Wagenknecht$", "Lukáš Wagenknecht"), ) for pattern in patterns: -- GitLab