pFad - Phone/Frame/Anonymizer/Declutterfier! Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/EuroPython/programapi/commit/db9d0218caa9581a977545ece7ddee27989b2ad0

ss" /> Add exclude option · EuroPython/programapi@db9d021 · GitHub
Skip to content

Commit db9d021

Browse files
committed
Add exclude option
1 parent dc4ba37 commit db9d021

5 files changed

Lines changed: 116 additions & 43 deletions

File tree

Makefile

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,28 @@
11
# Variables for the project
22
# =========================
3-
CONFERENCE ?= ep2024
3+
CONFERENCE ?= ep2025
44
DATA_DIR ?= ./data/public/$(CONFERENCE)/
55

66
# Variables for remote host
77
# =========================
88
VPS_USER ?= static_content_user
99
VPS_HOST ?= static.europython.eu
10-
VPS_PATH ?= /home/$(VPS_USER)/content/programapi/$(CONFERENCE)/releases
10+
VPS_PATH ?= /home/$(VPS_USER)/content/static/programme/$(CONFERENCE)/releases
1111
REMOTE_CMD=ssh $(VPS_USER)@$(VPS_HOST)
1212

1313
# Variables for deploy
14-
# ==========================
14+
# ====================
1515
TIMESTAMP ?= $(shell date +%Y%m%d%H%M%S)
1616
FORCE_DEPLOY ?= false
1717

18+
# Optional arguments
19+
# ==================
20+
EXCLUDE ?=
21+
WARN_DUPES ?= false
22+
23+
# Convert EXCLUDE space-separated list to repeated --exclude flags
24+
EXCLUDE_FLAGS = $(foreach item,$(EXCLUDE),--exclude $(item))
25+
1826
dev:
1927
uv sync --dev
2028

@@ -27,13 +35,13 @@ deps/install:
2735
install: deps/install
2836

2937
download:
30-
python -m src.download
38+
python -m src.download $(EXCLUDE_FLAGS)
3139

3240
transform:
3341
ifeq ($(WARN_DUPES), true)
34-
python -m src.transform --warn-dupes
42+
python -m src.transform $(EXCLUDE_FLAGS) --warn-dupes
3543
else
36-
python -m src.transform
44+
python -m src.transform $(EXCLUDE_FLAGS)
3745
endif
3846

3947
all: download transform

src/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66

77
class Config:
8-
event = "europython-2024"
9-
event_dir_name = "ep2024"
8+
event = "europython-2025"
9+
event_dir_name = "ep2025"
1010
project_root = Path(__file__).resolve().parents[1]
1111
raw_path = Path(f"{project_root}/data/raw/{event_dir_name}")
1212
public_path = Path(f"{project_root}/data/public/{event_dir_name}")

src/download.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,23 @@
11
import json
2+
from argparse import ArgumentParser
23
from typing import Any
34

45
import requests
56
from tqdm import tqdm
67

78
from src.config import Config
89

10+
parser = ArgumentParser(description="Download Pretalx data for EuroPython processing.")
11+
parser.add_argument(
12+
"-e",
13+
"--exclude",
14+
choices=["schedule", "youtube"],
15+
action="append",
16+
help="Exclude certain resources from download.",
17+
)
18+
args = parser.parse_args()
19+
exclude = set(args.exclude or [])
20+
921
headers = {
1022
"Accept": "application/json, text/javascript",
1123
"Authorization": f"Token {Config.token()}",
@@ -14,24 +26,27 @@
1426
base_url = f"https://pretalx.com/api/events/{Config.event}/"
1527
schedule_url = base_url + "schedules/latest/"
1628

29+
# Build resource list dynamically based on exclusions
1730
resources = [
18-
# Questions need to be passed to include answers in the same endpoint,
19-
# saving us later time with joining the answers.
2031
"submissions?questions=all&state=confirmed",
2132
"speakers?questions=all",
22-
"p/youtube",
2333
]
2434

35+
if "youtube" not in exclude:
36+
resources.append("p/youtube")
37+
2538
Config.raw_path.mkdir(parents=True, exist_ok=True)
2639

2740
for resource in resources:
28-
url = base_url + f"{resource}"
41+
# To get the resource name without extra parameters
42+
resource_name = resource.split("?")[0].split("/")[-1]
43+
url = base_url + resource
2944

3045
res0: list[dict[str, Any]] = []
3146
data: dict[str, Any] = {"next": url}
3247
n = 0
3348

34-
pbar = tqdm(desc=f"Downloading {resource}", unit=" page", dynamic_ncols=True)
49+
pbar = tqdm(desc=f"Downloading {resource_name}", unit=" page", dynamic_ncols=True)
3550

3651
while url := data["next"]:
3752
n += 1
@@ -46,24 +61,25 @@
4661

4762
pbar.close()
4863

49-
# To get the resource name without extra parameters
50-
filename = resource.split("?")[0].split("/")[-1]
51-
filename = f"{filename}_latest.json"
64+
# Save the data to a file
65+
filename = f"{resource_name}_latest.json"
5266
filepath = Config.raw_path / filename
5367

5468
with open(filepath, "w") as fd:
5569
json.dump(res0, fd)
5670

71+
# Download schedule unless excluded
72+
if "schedule" not in exclude:
73+
print("Downloading schedule...", end="")
74+
response = requests.get(schedule_url, headers=headers)
5775

58-
# Download schedule
59-
response = requests.get(schedule_url, headers=headers)
76+
if response.status_code != 200:
77+
raise Exception(f"Error {response.status_code}: {response.text}")
6078

61-
if response.status_code != 200:
62-
raise Exception(f"Error {response.status_code}: {response.text}")
63-
64-
data = response.json()
65-
filename = "schedule_latest.json"
66-
filepath = Config.raw_path / filename
79+
data = response.json()
80+
filename = "schedule_latest.json"
81+
filepath = Config.raw_path / filename
6782

68-
with open(filepath, "w") as fd:
69-
json.dump(data, fd)
83+
with open(filepath, "w") as fd:
84+
json.dump(data, fd)
85+
print(" done.")

src/transform.py

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import sys
1+
from argparse import ArgumentParser
22

33
from src.config import Config
44
from src.utils.parse import Parse
@@ -7,43 +7,88 @@
77
from src.utils.utils import Utils
88

99
if __name__ == "__main__":
10-
print(f"Parsing the data from {Config.raw_path}...")
10+
parser = ArgumentParser(description="Transform data from Pretalx to EuroPython format and save it.")
11+
parser.add_argument(
12+
"-w",
13+
"--warn-dupes",
14+
action="store_true",
15+
help="Warn about duplicates in the data.",
16+
)
17+
parser.add_argument(
18+
"-e",
19+
"--exclude",
20+
choices=["schedule", "youtube"],
21+
action="append",
22+
help="Exclude certain data from transformation.",
23+
)
24+
args = parser.parse_args()
25+
exclude = set(args.exclude or [])
26+
27+
28+
print(f"Parsing submissions from {Config.raw_path}/submissions_latest.json...", end="")
1129
pretalx_submissions = Parse.publishable_submissions(
1230
Config.raw_path / "submissions_latest.json"
1331
)
32+
print(" done.")
33+
34+
print(f"\nParsing speakers from {Config.raw_path}/speakers_latest.json...", end="")
1435
pretalx_speakers = Parse.publishable_speakers(
1536
Config.raw_path / "speakers_latest.json", pretalx_submissions.keys()
1637
)
17-
pretalx_schedule = Parse.schedule(Config.raw_path / "schedule_latest.json")
38+
print(" done.")
1839

19-
# Parse the YouTube data
20-
youtube_data = Parse.youtube(Config.raw_path / "youtube_latest.json")
40+
if "youtube" not in exclude:
41+
print(f"Parsing YouTube data from {Config.raw_path}/youtube_latest.json...", end="")
42+
youtube_data = Parse.youtube(Config.raw_path / "youtube_latest.json")
43+
print(" done.")
44+
else:
45+
youtube_data = {}
2146

22-
print("Computing timing relationships...")
47+
print("\nComputing timing relationships...", end="")
2348
TimingRelationships.compute(pretalx_submissions.values())
49+
print(" done.")
2450

25-
print("Transforming the data...")
51+
print("\nTransforming submissions...", end="")
2652
ep_sessions = Transform.pretalx_submissions_to_europython_sessions(
2753
pretalx_submissions,
2854
youtube_data,
2955
)
56+
print(" done.")
57+
58+
print("\nTransforming speakers...", end="")
3059
ep_speakers = Transform.pretalx_speakers_to_europython_speakers(pretalx_speakers)
31-
ep_schedule = Transform.pretalx_schedule_to_europython_schedule(
32-
pretalx_schedule.breaks, ep_sessions, ep_speakers
33-
)
60+
print(" done.")
3461

3562
# Warn about duplicates if the flag is set
36-
if len(sys.argv) > 1 and sys.argv[1] == "--warn-dupes":
63+
if args.warn_dupes:
3764
Utils.warn_duplicates(
3865
session_attributes_to_check=["title"],
3966
speaker_attributes_to_check=["name"],
4067
sessions_to_check=ep_sessions,
4168
speakers_to_check=ep_speakers,
4269
)
4370

44-
print(f"Writing the data to {Config.public_path}...")
71+
print(f"\nWriting sessions to {Config.public_path}/sessions.json...", end="")
4572
Utils.write_to_file(Config.public_path / "sessions.json", ep_sessions)
73+
print(" done.")
74+
75+
print(f"\nWriting speakers to {Config.public_path}/speakers.json...", end="")
4676
Utils.write_to_file(Config.public_path / "speakers.json", ep_speakers)
47-
Utils.write_to_file(
48-
Config.public_path / "schedule.json", ep_schedule, direct_dump=True
49-
)
77+
print(" done.")
78+
79+
if "schedule" not in exclude:
80+
print("\nParsing schedule from {Config.raw_path}/schedule_latest.json...", end="")
81+
pretalx_schedule = Parse.schedule(Config.raw_path / "schedule_latest.json")
82+
print(" done.")
83+
84+
print(f"\nTransforming the schedule...", end="")
85+
ep_schedule = Transform.pretalx_schedule_to_europython_schedule(
86+
pretalx_schedule.breaks, ep_sessions, ep_speakers
87+
)
88+
print(" done.")
89+
90+
print(f"\nWriting schedule to {Config.public_path}/schedule.json...", end="")
91+
Utils.write_to_file(
92+
Config.public_path / "schedule.json", ep_schedule, direct_dump=True
93+
)
94+
print(" done.")

src/utils/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def warn_duplicates(
7575
Warns about duplicate attributes in the given objects
7676
"""
7777
print(
78-
f"Checking for duplicate {'s, '.join(session_attributes_to_check)}s in sessions..."
78+
f"\nChecking for duplicate {'s, '.join(session_attributes_to_check)}s in sessions...",
79+
end="",
7980
)
8081
duplicate_sessions = Utils.find_duplicate_attributes(
8182
sessions_to_check, session_attributes_to_check
@@ -84,9 +85,11 @@ def warn_duplicates(
8485
for attribute, codes in duplicate_sessions.items():
8586
if len(codes) > 1:
8687
print(f"Duplicate ``{attribute}`` in sessions: {codes}")
88+
print(" done.")
8789

8890
print(
89-
f"Checking for duplicate {'s, '.join(speaker_attributes_to_check)}s in speakers..."
91+
f"Checking for duplicate {'s, '.join(speaker_attributes_to_check)}s in speakers...",
92+
end="",
9093
)
9194
duplicate_speakers = Utils.find_duplicate_attributes(
9295
speakers_to_check, speaker_attributes_to_check
@@ -95,6 +98,7 @@ def warn_duplicates(
9598
for attribute, codes in duplicate_speakers.items():
9699
if len(codes) > 1:
97100
print(f"Duplicate ``{attribute}`` in speakers: {codes}")
101+
print(" done.")
98102

99103
@staticmethod
100104
def compute_unique_slugs_by_attribute(

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.





Check this box to remove all script contents from the fetched content.



Check this box to remove all images from the fetched content.


Check this box to remove all CSS styles from the fetched content.


Check this box to keep images inefficiently compressed and original size.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy