Skip to content
Snippets Groups Projects
Commit 6ee477c9 authored by OndraRehounek's avatar OndraRehounek
Browse files

twitter_utils

parent a5124932
No related branches found
No related tags found
2 merge requests!607Pirati.cz,!575Feature/pirati cz
Pipeline #9270 failed
......@@ -3,4 +3,4 @@
line_length = 88
multi_line_output = 3
include_trailing_comma = true
known_third_party = PyPDF2,arrow,bleach,bs4,captcha,celery,django,environ,faker,fastjsonschema,icalevnt,markdown,modelcluster,pirates,pytest,pytz,requests,sentry_sdk,taggit,wagtail,wagtailmetadata,weasyprint,yaml,zoneinfo
known_third_party = PyPDF2,arrow,bleach,bs4,captcha,celery,django,environ,faker,fastjsonschema,icalevnt,markdown,modelcluster,pirates,pytest,pytz,requests,sentry_sdk,taggit,tweepy,wagtail,wagtailmetadata,weasyprint,yaml,zoneinfo
......@@ -174,6 +174,7 @@ Přes CRON je třeba na pozadí spouštět Django `manage.py` commandy:
* `publish_scheduled_pages` - publikuje naplánované stránky (každou hodinu)
* `update_callendars` - stáhne a aktualizuje kalendáře (několikrát denně)
* `update_redmine_issues` - aktualizuje programované body MS a KS stránek napojených na Redmine (několikrát denně)
* `update_tweets` - aktualizuje tweety z účtu PiratskaStrana (každou hodinu) - vyžaduje mít v .env TWITTER_BEARER_TOKEN
### Fulltextové vyhledávání v češtině
......
import tweepy
print(public_tweets)
......@@ -46,6 +46,7 @@ INSTALLED_APPS = [
"calendar_utils",
"maps_utils",
"redmine_utils",
"twitter_utils",
"users",
"pirates",
"tuning",
......@@ -303,3 +304,5 @@ ELECTIONS2021_NEWSLETTER_SOURCE = env.str(
MAPS_UTILS_MAPPROXY_URL = env.str(
"MAPPROXY_URL", default="https://mapproxy.pir-test.eu"
)
TWITTER_BEARER_TOKEN = env.str("TWITTER_BEARER_TOKEN")
......@@ -24,3 +24,4 @@ weasyprint
pypdf2
pyyaml
fastjsonschema
tweepy
......@@ -4,12 +4,12 @@
#
# pip-compile base.in
#
--extra-index-url https://pip:glpat-KpSQYD89Uw-zz3VJTMmT@gitlab.com/api/v4/groups/6879633/-/packages/pypi/simple
amqp==5.1.1
# via kombu
anyascii==0.3.1
# via wagtail
appnope==0.1.3
# via ipython
arrow==1.2.2
# via
# -r base.in
......@@ -154,6 +154,10 @@ mozilla-django-oidc==2.0.0
# via pirates
numpy==1.23.1
# via opencv-python
oauthlib==3.2.0
# via
# requests-oauthlib
# tweepy
opencv-python==4.6.0.66
# via -r base.in
openpyxl==3.0.10
......@@ -222,7 +226,11 @@ requests==2.28.1
# via
# -r base.in
# mozilla-django-oidc
# requests-oauthlib
# tweepy
# wagtail
requests-oauthlib==1.3.1
# via tweepy
sentry-sdk==1.9.2
# via -r base.in
six==1.16.0
......@@ -254,6 +262,8 @@ traitlets==5.3.0
# via
# ipython
# matplotlib-inline
tweepy==4.10.0
# via -r base.in
urllib3==1.26.11
# via
# requests
......
from django.apps import AppConfig
class TwitterUtilsConfig(AppConfig):
name = "twitter_utils"
from django.conf import settings
from django.core.management.base import BaseCommand
from ...services import TweetDownloadService
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--days_back",
default=1,
required=False,
type=int,
help="Stáří tweetů ve dnech",
)
def handle(self, *args, **options):
tds = TweetDownloadService(
bearer_token=settings.TWITTER_BEARER_TOKEN, days_back=options["days_back"]
)
tds.perform_update()
self.stdout.write("\nUpdating tweets finished!")
# Generated by Django 4.0.7 on 2022-08-19 08:35
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="Tweet",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"author_img_url",
models.URLField(
default="https://pbs.twimg.com/profile_images/1556544269443387394/jSO2A2Fr_200x200.jpg"
),
),
("author_name", models.CharField(default="Piráti", max_length=128)),
(
"author_username",
models.CharField(default="PiratskaStrana", max_length=128),
),
("text", models.TextField()),
("twitter_id", models.CharField(max_length=32, unique=True)),
],
),
]
from django.db import models
class Tweet(models.Model):
"""
Model pro ukládání Tweetů getnutých v manage commandu update_tweets z API Twitteru.
ID Tweetu ukládáme jako string, protože i limit BigInt je stejného řádu jako ID
nejnovějších Tweetů (2022).
"""
author_img_url = models.URLField(
default="https://pbs.twimg.com/profile_images/1556544269443387394/jSO2A2Fr_200x200.jpg"
) # TODO consider another default, maybe from static
author_name = models.CharField(max_length=128, default="Piráti")
author_username = models.CharField(max_length=128, default="PiratskaStrana")
text = models.TextField()
twitter_id = models.CharField(max_length=32, unique=True)
from datetime import timedelta
from django.utils import timezone
from tweepy import Client
from .models import Tweet
class TweetDownloadService:
client: Client
days_back: int
def __init__(self, bearer_token, days_back=1):
if not bearer_token:
raise RuntimeError("Twitter bearer token not set, cannot update tweets")
self.client = Client(bearer_token=bearer_token)
self.days_back = days_back
@staticmethod
def get_latest_saved_tweet_id() -> list[int]:
"""
Vrací IDs už uložených Tweetů - možná by stálo za to brát jen z určitého
časového období...
"""
return Tweet.objects.values_list("twitter_id", flat=True)
def get_tweets_response(self, user_id) -> list:
"""
Vrací list tweetů (objektů) pro daného Twitter uživatele.
"""
tweets_response = self.client.get_users_tweets(
user_id,
expansions=["author_id", "entities.mentions.username"],
max_results=100,
start_time=timezone.now() - timedelta(days=self.days_back),
tweet_fields=["author_id", "created_at"],
user_fields=["name", "username"],
) # 49022430
return tweets_response.data
def get_user_response(self) -> dict:
"""
Vrací informace o uživateli "PiratskaStrana"
Tento call není až tak potřeba, pokud bychom zahardcodili ID 49022430
"""
user_response = self.client.get_user(
username="PiratskaStrana",
user_fields=["profile_image_url"], # id, name, username enabled by default
)
return user_response.data
def perform_update(self) -> int:
"""
Obaluje celý proces downloadu Tweetů z API do DB.
"""
user_data_dict = self.get_user_response()
downloaded_tweets_list = self.get_tweets_response(user_id=user_data_dict["id"])
existing_tweet_id_list = self.get_latest_saved_tweet_id()
tweets_to_save = []
for tweet in downloaded_tweets_list:
if str(tweet.id) not in existing_tweet_id_list:
tweets_to_save.append(
Tweet(
author_img_url=user_data_dict["profile_image_url"],
author_name=user_data_dict["name"],
author_username=user_data_dict["username"],
text=tweet.text,
twitter_id=tweet.id,
)
) # zatím nechávám author fieldy jako default @PiratskaStrana
return Tweet.objects.bulk_create(tweets_to_save)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment