import json
import os
import pandas as pd
from databallpy.data_parsers import Metadata
from databallpy.data_parsers.event_data_parsers import (
load_instat_event_data,
load_metrica_event_data,
load_metrica_open_event_data,
load_opta_event_data,
load_scisports_event_data,
load_sportec_event_data,
load_sportec_open_event_data,
load_statsbomb_event_data,
)
from databallpy.data_parsers.tracking_data_parsers import (
load_inmotio_tracking_data,
load_metrica_open_tracking_data,
load_metrica_tracking_data,
load_sportec_open_tracking_data,
load_tracab_tracking_data,
)
from databallpy.data_parsers.tracking_data_parsers.utils import (
_quality_check_tracking_data,
)
from databallpy.events import IndividualCloseToBallEvent, PassEvent
from databallpy.game import Game
from databallpy.schemas import (
EventData,
EventDataSchema,
TrackingData,
TrackingDataSchema,
)
from databallpy.utils.align_player_ids import (
align_player_ids_jersey,
align_player_ids_name_similarity,
)
from databallpy.utils.constants import MISSING_INT
from databallpy.utils.game_utils import create_event_attributes_dataframe
from databallpy.utils.logging import create_logger, logging_wrapper
from databallpy.utils.warnings import deprecated
LOGGER = create_logger(__name__)
logging_wrapper(__file__)
[docs]
def get_game(
tracking_data_loc: str | None = None,
tracking_metadata_loc: str | None = None,
event_data_loc: str | None = None,
event_metadata_loc: str | None = None,
event_match_loc: str | None = None,
event_lineup_loc: str | None = None,
tracking_data_provider: str | None = None,
event_data_provider: str | None = None,
check_quality: bool = True,
_check_game_class_: bool = True,
verbose: bool = True,
) -> Game:
"""
Function to get all information of a game given its datasources
Args:
tracking_data_loc (str, optional): location of the tracking data.
Defaults to None.
tracking_metadata_loc (str, optional): location of the metadata of the tracking
data. Defaults to None.
event_data_loc (str, optional): location of the event data. Defaults to None.
event_metadata_loc (str, optional): location of the metadata of the event data.
Defaults to None.
event_match_loc (str, optional): location of the game file of the event data.
Only used for statsbomb event data. Defaults to None.
event_lineup_loc (str, optional): location of the lineup file of the event data.
Only used for statsbomb event data. Defaults to None.
tracking_data_provider (str, optional): provider of the tracking data. Defaults
to None. Supported providers are [tracab, metrica, inmotio]
event_data_provider (str, optional): provider of the event data. Defaults to
None. Supported providers are [opta, metrica, instat, scisports]
check_quality (bool, optional): whether you want to check the quality of the
tracking data. Defaults to True
verbose (bool, optional): whether or not to print info about progress
Returns:
(Game): a game object with all information available of the game.
"""
LOGGER.info(
"Trying to load a new game in get_game();"
f"\n\tTracking data loc: {str(tracking_data_loc)}"
f"\n\tTracking data provider: {str(tracking_data_provider)}"
f"\n\tEvent data loc: {str(event_data_loc)}"
f"\n\tEven data provider: {str(event_data_provider)}"
f"\n\tCheck quality: {str(check_quality)}"
f"\n\tVerbose: {str(verbose)}"
)
if (
event_data_loc
and event_metadata_loc is None
and event_data_provider not in ["scisports", "statsbomb"]
):
raise ValueError(
"Please provide an event metadata location when providing an event"
" data location"
)
elif event_data_loc and event_data_provider is None:
raise ValueError(
"Please provide an event data provider when providing an event"
" data location"
)
elif event_metadata_loc and event_data_provider is None:
raise ValueError(
"Please provide an event data provider when providing an event"
" metadata location"
)
elif event_data_provider == "statsbomb" and (
event_match_loc is None or event_lineup_loc is None
):
raise ValueError(
"Please provivde both event_match_loc and event_lineup_loc when using statsbomb as event data provider"
)
elif tracking_data_loc and tracking_data_provider is None:
raise ValueError(
"Please provide a tracking data provider when providing a tracking"
" data location"
)
elif tracking_data_loc and tracking_metadata_loc is None:
raise ValueError(
"Please provide a tracking metadata location when providing a tracking"
" data location"
)
uses_tracking_data = False
uses_event_data = False
tracking_precise_timestamps = {
"tracab": True,
"metrica": True,
"inmotio": False,
"sportec": True,
"dfl": True,
}
event_precise_timestamps = {
"opta": True,
"metrica": True,
"instat": False,
"scisports": False,
"statsbomb": False,
"sportec": True,
"dfl": True,
}
uses_tracking_data = False
uses_event_data = False
# Check if event data should be loaded
if event_data_loc and event_data_provider:
event_data, event_metadata, databallpy_events = load_event_data(
event_data_loc=event_data_loc,
event_metadata_loc=event_metadata_loc,
event_data_provider=event_data_provider,
event_match_loc=event_match_loc,
event_lineup_loc=event_lineup_loc,
)
EventDataSchema.validate(event_data)
uses_event_data = True
event_precise_timestamps = {
"opta": True,
"metrica": True,
"instat": False,
"scisports": False,
"sportec": True,
"dfl": True,
"statsbomb": False,
}
LOGGER.info(
"Succesfully passed input checks. Attempting to load the base "
"data (get_game())."
)
# Check if tracking data should be loaded
if tracking_data_loc and tracking_metadata_loc and tracking_data_provider:
tracking_data, tracking_metadata = load_tracking_data(
tracking_data_loc=tracking_data_loc,
tracking_metadata_loc=tracking_metadata_loc,
tracking_data_provider=tracking_data_provider,
verbose=verbose,
)
if not uses_event_data:
databallpy_events = {}
TrackingDataSchema.validate(tracking_data)
uses_tracking_data = True
if not uses_event_data and not uses_tracking_data:
raise ValueError("No data loaded, please provide data locations and providers")
LOGGER.info(
f"Loaded data in get_game():\n\tTracking data: {str(uses_tracking_data)}"
f"\n\tEvent data: {str(uses_event_data)}"
)
# extra checks when using both tracking and event data
LOGGER.info("Combining info from tracking and event data in get_game()")
if uses_tracking_data and uses_event_data:
tracking_metadata.periods_frames = merge_metadata_periods(
tracking_metadata.periods_frames, event_metadata.periods_frames
)
event_data, databallpy_events = rescale_event_data(
tracking_metadata.pitch_dimensions,
event_metadata.pitch_dimensions,
event_data,
databallpy_events,
)
tracking_metadata = align_player_and_team_ids(event_metadata, tracking_metadata)
(
event_metadata.home_players,
event_metadata.away_players,
) = merge_player_info(tracking_metadata, event_metadata)
# check quality of tracking data
allow_synchronise = False
if check_quality and uses_tracking_data:
allow_synchronise = _quality_check_tracking_data(
tracking_data,
tracking_metadata.frame_rate,
tracking_metadata.periods_frames,
)
allow_synchronise = False if not uses_event_data else allow_synchronise
changed_periods = None
if uses_tracking_data:
changed_periods = tracking_metadata.periods_changed_playing_direction
shot_events = (
create_event_attributes_dataframe(databallpy_events["shot_events"])
if "shot_events" in databallpy_events.keys()
else pd.DataFrame()
)
pass_events = (
create_event_attributes_dataframe(databallpy_events["pass_events"])
if "pass_events" in databallpy_events.keys()
else pd.DataFrame()
)
dribble_events = (
create_event_attributes_dataframe(databallpy_events["dribble_events"])
if "dribble_events" in databallpy_events.keys()
else pd.DataFrame()
)
if uses_event_data:
home_players = event_metadata.home_players
away_players = event_metadata.away_players
else:
home_players = tracking_metadata.home_players
away_players = tracking_metadata.away_players
LOGGER.info("Creating game object in get_game()")
game = Game(
tracking_data=TrackingData(
tracking_data,
provider=tracking_data_provider,
frame_rate=tracking_metadata.frame_rate,
)
if uses_tracking_data
else TrackingData(),
event_data=EventData(event_data, provider=event_data_provider)
if uses_event_data
else EventData(),
pitch_dimensions=tracking_metadata.pitch_dimensions
if uses_tracking_data
else event_metadata.pitch_dimensions,
periods=tracking_metadata.periods_frames
if uses_tracking_data
else event_metadata.periods_frames,
home_team_id=event_metadata.home_team_id
if uses_event_data
else tracking_metadata.home_team_id,
home_formation=event_metadata.home_formation
if uses_event_data
else tracking_metadata.home_formation,
home_score=event_metadata.home_score
if uses_event_data
else tracking_metadata.home_score,
home_team_name=event_metadata.home_team_name
if uses_event_data
else tracking_metadata.home_team_name,
home_players=home_players,
away_team_id=event_metadata.away_team_id
if uses_event_data
else tracking_metadata.away_team_id,
away_formation=event_metadata.away_formation
if uses_event_data
else tracking_metadata.away_formation,
away_score=event_metadata.away_score
if uses_event_data
else tracking_metadata.away_score,
away_team_name=event_metadata.away_team_name
if uses_event_data
else tracking_metadata.away_team_name,
away_players=away_players,
country=event_metadata.country if uses_event_data else tracking_metadata.country,
allow_synchronise_tracking_and_event_data=allow_synchronise,
shot_events=shot_events,
dribble_events=dribble_events,
pass_events=pass_events,
_event_timestamp_is_precise=event_precise_timestamps[event_data_provider]
if uses_event_data
else False,
_tracking_timestamp_is_precise=tracking_precise_timestamps[
tracking_data_provider
]
if uses_tracking_data
else False,
_periods_changed_playing_direction=changed_periods,
_check_inputs_=_check_game_class_,
)
LOGGER.info(f"Succesfully created game object: {game.name}")
return game
[docs]
@logging_wrapper(__file__)
def get_saved_game(name: str, path: str = os.getcwd()) -> Game:
"""Function to load a saved game object
Args:
name (str): the name with the to be loaded game, should be a folder.
The folder should contain:
- tracking_data.parquet
- event_data.parquet
- periods.parquet
- pass_events.parquet
- shot_events.parquet
- dribble_events.parquet
- away_players.parquet
- home_players.parquet
- metadata.json
path (str, optional): path of directory where game is saved. Defaults
to current working directory.
Returns:
Game: All information about the game
"""
full_path = os.path.join(path, name)
if not os.path.isdir(full_path):
raise ValueError(f"Directory {full_path} does not exist")
with open(os.path.join(full_path, "metadata.json"), "rb") as f:
metadata = json.load(f)
return Game(
tracking_data=TrackingData(
pd.read_parquet(os.path.join(full_path, "tracking_data.parquet")),
provider=metadata["tracking_data_provider"],
frame_rate=metadata["tracking_data_frame_rate"],
),
event_data=EventData(
pd.read_parquet(os.path.join(full_path, "event_data.parquet")),
provider=metadata["event_data_provider"],
),
pitch_dimensions=metadata["pitch_dimensions"],
periods=pd.read_parquet(os.path.join(full_path, "periods.parquet")),
home_team_id=metadata["home_team_id"],
home_formation=metadata["home_formation"],
home_score=metadata["home_score"],
home_team_name=metadata["home_team_name"],
home_players=pd.read_parquet(os.path.join(full_path, "home_players.parquet")),
away_team_id=metadata["away_team_id"],
away_formation=metadata["away_formation"],
away_score=metadata["away_score"],
away_team_name=metadata["away_team_name"],
away_players=pd.read_parquet(os.path.join(full_path, "away_players.parquet")),
country=metadata["country"],
allow_synchronise_tracking_and_event_data=metadata[
"allow_synchronise_tracking_and_event_data"
],
shot_events=pd.read_parquet(os.path.join(full_path, "shot_events.parquet")),
dribble_events=pd.read_parquet(
os.path.join(full_path, "dribble_events.parquet")
),
pass_events=pd.read_parquet(os.path.join(full_path, "pass_events.parquet")),
_tracking_timestamp_is_precise=metadata["_tracking_timestamp_is_precise"],
_event_timestamp_is_precise=metadata["_event_timestamp_is_precise"],
_periods_changed_playing_direction=metadata[
"_periods_changed_playing_direction"
],
_is_synchronised=metadata["_is_synchronised"],
_check_inputs_=False,
)
@logging_wrapper(__file__)
def load_tracking_data(
*,
tracking_data_loc: str,
tracking_metadata_loc: str,
tracking_data_provider: str,
verbose: bool = True,
) -> tuple[pd.DataFrame, Metadata]:
"""Function to load the tracking data of a game
Args:
tracking_data_loc (str): location of the tracking data file
tracking_metadata_loc (str): location of the tracking metadata file
tracking_data_provider (str): provider of the tracking data
verbose (bool, optional): whether or not to print info about progress
Returns:
Tuple[pd.DataFrame, Metadata]: tracking data and metadata of the game
"""
if tracking_data_provider not in ["tracab", "metrica", "inmotio", "sportec", "dfl"]:
raise ValueError(
f"We do not support '{tracking_data_provider}' as tracking data provider"
" yet, please open an issue in our Github repository."
)
# Get tracking data and tracking metadata
if tracking_data_provider in ["tracab", "sportec", "dfl"]:
tracking_data, tracking_metadata = load_tracab_tracking_data(
tracking_data_loc, tracking_metadata_loc, verbose=verbose
)
elif tracking_data_provider == "metrica":
tracking_data, tracking_metadata = load_metrica_tracking_data(
tracking_data_loc=tracking_data_loc,
metadata_loc=tracking_metadata_loc,
verbose=verbose,
)
elif tracking_data_provider == "inmotio":
tracking_data, tracking_metadata = load_inmotio_tracking_data(
tracking_data_loc=tracking_data_loc,
metadata_loc=tracking_metadata_loc,
verbose=verbose,
)
return tracking_data, tracking_metadata
@logging_wrapper(__file__)
def load_event_data(
*,
event_data_loc: str,
event_metadata_loc: str,
event_data_provider: str,
event_match_loc: str,
event_lineup_loc: str,
) -> tuple[pd.DataFrame, Metadata, dict]:
"""Function to load the event data of a game
Args:
event_data_loc (str): location of the event data file
event_metadata_loc (str): location of the event metadata file
event_data_provider (str): provider of the event data
event_match_loc (str): location of match file (specific to statsbomb)
event_lineup_loc (str): location of lineup file (specific to statsbomb)
Returns:
Tuple[pd.DataFrame, Metadata]: event data and metadata of the game
"""
if event_data_provider not in [
"opta",
"metrica",
"instat",
"scisports",
"statsbomb",
"sportec",
"dfl",
]:
raise ValueError(
f"We do not support '{event_data_provider}' as event data provider yet, "
"please open an issue in our Github repository."
)
# Get event data and event metadata
databallpy_events = {}
if event_data_provider == "opta":
event_data, event_metadata, databallpy_events = load_opta_event_data(
f7_loc=event_metadata_loc, f24_loc=event_data_loc
)
elif event_data_provider == "metrica":
event_data, event_metadata, databallpy_events = load_metrica_event_data(
event_data_loc=event_data_loc, metadata_loc=event_metadata_loc
)
elif event_data_provider == "instat":
event_data, event_metadata, _ = load_instat_event_data(
event_data_loc=event_data_loc, metadata_loc=event_metadata_loc
)
elif event_data_provider == "scisports":
event_data, event_metadata, databallpy_events = load_scisports_event_data(
events_json=event_data_loc,
)
elif event_data_provider == "statsbomb":
event_data, event_metadata, databallpy_events = load_statsbomb_event_data(
events_loc=event_data_loc,
match_loc=event_match_loc,
lineup_loc=event_lineup_loc,
)
elif event_data_provider in ["sportec", "dfl"]:
event_data, event_metadata, databallpy_events = load_sportec_event_data(
event_data_loc=event_data_loc, metadata_loc=event_metadata_loc
)
return event_data, event_metadata, databallpy_events
[docs]
@logging_wrapper(__file__)
def get_open_game(
provider: str = "sportec",
game_id: str = "J03WMX",
verbose: bool = True,
use_cache: bool = True,
) -> Game:
"""Function to load a game object from an open datasource
Args:
provider (str, optional): What provider to get the open data from. Defaults to "dfl". Options are ["metrica", "dfl", "sportec", "tracab"]
game_id (str, optional): The Game id of the open game. Defaults to 'J03WMX',
verbose (bool, optional): Whether or not to print info about progress
in the terminal, Defaults to True.
use_cache (bool, optional): Use cached version of match if available.
Returns:
Game: All information about the game
"""
provider_options = ["metrica", "dfl", "sportec", "tracab"]
if provider not in provider_options:
raise ValueError(
f"Open game provider should be in {provider_options}, not {provider}."
)
if provider == "metrica":
save_path = os.path.join("datasets", "metrica")
if use_cache and os.path.exists(save_path):
return get_saved_game(save_path)
tracking_data, metadata = load_metrica_open_tracking_data(verbose=verbose)
event_data, ed_metadata, databallpy_events = load_metrica_open_event_data()
elif provider in ["dfl", "tracab", "sportec"]:
save_path = os.path.join("datasets", "IDSSE", game_id)
if use_cache and os.path.exists(save_path):
return get_saved_game(save_path)
tracking_data, metadata = load_sportec_open_tracking_data(
game_id=game_id,
verbose=verbose,
)
event_data, ed_metadata, databallpy_events = load_sportec_open_event_data(
game_id=game_id
)
os.remove(os.path.join("datasets", "IDSSE", game_id, "tracking_data_temp.xml"))
os.remove(os.path.join("datasets", "IDSSE", game_id, "metadata_temp.xml"))
periods_cols = ed_metadata.periods_frames.columns.difference(
metadata.periods_frames.columns
).to_list()
periods_cols.sort(reverse=True)
merged_periods = pd.concat(
(
metadata.periods_frames,
ed_metadata.periods_frames[periods_cols],
),
axis=1,
)
shot_events = (
create_event_attributes_dataframe(databallpy_events["shot_events"])
if "shot_events" in databallpy_events.keys()
else pd.DataFrame()
)
pass_events = (
create_event_attributes_dataframe(databallpy_events["pass_events"])
if "pass_events" in databallpy_events.keys()
else pd.DataFrame()
)
dribble_events = (
create_event_attributes_dataframe(databallpy_events["dribble_events"])
if "dribble_events" in databallpy_events.keys()
else pd.DataFrame()
)
game = Game(
tracking_data=TrackingData(
tracking_data, provider=provider, frame_rate=metadata.frame_rate
),
event_data=EventData(event_data, provider=provider),
pitch_dimensions=metadata.pitch_dimensions,
periods=merged_periods,
home_team_id=metadata.home_team_id,
home_formation=metadata.home_formation,
home_score=metadata.home_score,
home_team_name=metadata.home_team_name,
home_players=metadata.home_players,
away_team_id=metadata.away_team_id,
away_formation=metadata.away_formation,
away_score=metadata.away_score,
away_team_name=metadata.away_team_name,
away_players=metadata.away_players,
country=ed_metadata.country,
allow_synchronise_tracking_and_event_data=True,
shot_events=shot_events,
dribble_events=dribble_events,
pass_events=pass_events,
_tracking_timestamp_is_precise=True,
_event_timestamp_is_precise=True,
_periods_changed_playing_direction=(metadata.periods_changed_playing_direction),
)
game.save_game(save_path, verbose=False, allow_overwrite=True)
return game
@logging_wrapper(__file__)
def merge_metadata_periods(
tracking_periods: pd.DataFrame, event_periods: pd.DataFrame
) -> pd.DataFrame:
"""Function to merge the periods of the event and tracking metadata
Args:
tracking_periods (pd.DataFrame): periods of the tracking metadata
event_periods (pd.DataFrame): periods of the event metadata
Returns:
pd.DataFrame: merged periods
"""
periods_cols = event_periods.columns.difference(tracking_periods.columns).to_list()
periods_cols.sort(reverse=True)
merged_periods = pd.concat(
(
tracking_periods,
event_periods[periods_cols],
),
axis=1,
)
return merged_periods
@logging_wrapper(__file__)
def rescale_event_data(
tracking_pitch_dimensions: list[float, float],
event_pitch_dimensions: list[float, float],
event_data: pd.DataFrame,
databallpy_events: dict[str, dict[str | int,]] = None,
) -> tuple[pd.DataFrame, dict[str, dict[str | int, IndividualCloseToBallEvent]]]:
"""Function to rescale the event data and databallpy events to the tracking data
dimensions if the event data is not scaled in the same dimensions of the tracking
data.
Args:
tracking_pitch_dimensions (list): pitch dimensions of the tracking data
event_pitch_dimensions (list): pitch dimensions of the event data
event_data (pd.DataFrame): event data
databallpy_events (dict): databallpy events
Returns:
Tuple[pd.DataFrame, dict]: rescaled event data and databallpy events
"""
if (
tracking_pitch_dimensions == event_pitch_dimensions
or pd.isnull(list(event_pitch_dimensions)).any()
or pd.isnull(list(tracking_pitch_dimensions)).any()
):
LOGGER.info(
"Scaling is not needed because pitch dimensions are equal, "
"or scaling is not possible because pitch dimensions have nan values."
"(rescale_event_data())"
)
return event_data, databallpy_events
x_correction = tracking_pitch_dimensions[0] / event_pitch_dimensions[0]
y_correction = tracking_pitch_dimensions[1] / event_pitch_dimensions[1]
event_data["start_x"] *= x_correction
event_data["start_y"] *= y_correction
# correct the databallpy event instances as well
if databallpy_events is not None:
for dict_of_events in databallpy_events.values():
for event in dict_of_events.values():
event.start_x *= x_correction
event.start_y *= y_correction
if isinstance(event, PassEvent):
event.end_x *= x_correction
event.end_y *= y_correction
return event_data, databallpy_events
@logging_wrapper(__file__)
def align_player_and_team_ids(
event_metadata: Metadata, tracking_metadata: Metadata
) -> pd.DataFrame:
"""Function to align the player and team id's of the tracking data with the event
data.
Args:
event_metadata (Metadata): event metadata
tracking_metadata (Metadata): tracking metadata
Returns:
pd.DataFrame: tracking data with aligned player and team id's
"""
home_eq = (
tracking_metadata.home_players["id"]
.isin(event_metadata.home_players["id"])
.sum()
> 11
)
away_eq = (
tracking_metadata.away_players["id"]
.isin(event_metadata.away_players["id"])
.sum()
> 11
)
if not home_eq or not away_eq:
if (tracking_metadata.home_players["shirt_num"] == MISSING_INT).all() or (
event_metadata.home_players["shirt_num"] == MISSING_INT
).all():
tracking_metadata = align_player_ids_name_similarity(
tracking_metadata, event_metadata
)
else:
tracking_metadata = align_player_ids_jersey(
tracking_metadata, event_metadata
)
# Align team id's
tracking_metadata.home_team_id = event_metadata.home_team_id
tracking_metadata.away_team_id = event_metadata.away_team_id
tracking_metadata.home_team_name = event_metadata.home_team_name
tracking_metadata.away_team_name = event_metadata.away_team_name
return tracking_metadata
@logging_wrapper(__file__)
def merge_player_info(
tracking_metadata: Metadata, event_metadata: Metadata
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""Function to merge the player information of the tracking and event metadata
Args:
tracking_metadata (Metadata): metadata of the tracking data
event_metadata (Metadata): metadata of the event data
Returns:
Tuple[pd.DataFrame, pd.DataFrame]: merged home player information
and merged away player information.
"""
player_cols = event_metadata.home_players.columns.difference(
tracking_metadata.home_players.columns
).to_list()
player_cols.append("id")
home_players = tracking_metadata.home_players.merge(
event_metadata.home_players[player_cols], on="id"
)
away_players = tracking_metadata.away_players.merge(
event_metadata.away_players[player_cols], on="id"
)
away_players["position"] = event_metadata.away_players["position"]
home_players["position"] = event_metadata.home_players["position"]
return home_players, away_players
@deprecated(
"`get_match` is deprecated and will be removed in version 0.8.0. Please use `get_game` instead"
)
def get_match(*args, **kwargs):
return get_game(*args, **kwargs)
@deprecated(
"`get_saved_match` is deprecated and will be removed in version 0.8.0. Please use `get_saved_game` instead"
)
def get_open_match(*args, **kwargs):
return get_open_game(*args, **kwargs)
@deprecated(
"`get_saved_match` is deprecated and will be removed in version 0.8.0. Please use `get_saved_game` instead"
)
def get_saved_match(*args, **kwargs):
return get_saved_game(*args, **kwargs)