Source code for databallpy.data_parsers.tracking_data_parsers.metrica_tracking_data_parser

import io
import os

import numpy as np
import pandas as pd
import requests
from tqdm import tqdm

from databallpy.data_parsers import Metadata
from databallpy.data_parsers.metrica_metadata_parser import (
    _get_metadata,
    _get_td_channels,
    _update_metadata,
)
from databallpy.data_parsers.tracking_data_parsers.utils import (
    _add_ball_data_to_dict,
    _add_datetime,
    _add_periods_to_tracking_data,
    _add_player_tracking_data_to_dict,
    _get_gametime,
    _insert_missing_rows,
    _normalize_playing_direction_tracking,
)
from databallpy.utils.logging import logging_wrapper
from databallpy.utils.utils import _to_int


[docs] @logging_wrapper(__file__) def load_metrica_tracking_data( tracking_data_loc: str, metadata_loc: str, verbose: bool = True ) -> tuple[pd.DataFrame, Metadata]: """Function to load metrica tracking data. Args: tracking_data_loc (str): location of the tracking data .txt file metadata_loc (str): location of the metadata .xml file verbose (bool, optional): whether to print information about the progress in the terminall. Defaults to True. Raises: TypeError: if tracking_data_loc is not a string or io.StringIO Returns: Tuple[pd.DataFrame, Metadata]: tracking and metadata of the game """ if isinstance(tracking_data_loc, str): if not os.path.exists(tracking_data_loc): message = f"Could not find {tracking_data_loc}" raise FileNotFoundError(message) if not os.path.exists(metadata_loc): message = f"Could not find {metadata_loc}" raise FileNotFoundError(message) elif isinstance(tracking_data_loc, io.StringIO): # open tracking data, downloaded from internet. pass else: raise TypeError( "tracking_data_loc must be either a str or a StringIO object," f" not a {type(tracking_data_loc)}" ) metadata = _get_metadata(metadata_loc) td_channels = _get_td_channels(metadata_loc, metadata) metadata = _update_metadata(td_channels, metadata) tracking_data = _get_tracking_data( tracking_data_loc, td_channels, metadata.pitch_dimensions, verbose=verbose ) tracking_data, changed_periods = _normalize_playing_direction_tracking( tracking_data, metadata.periods_frames ) metadata.periods_changed_playing_direction = changed_periods tracking_data["datetime"] = _add_datetime( tracking_data["frame"], metadata.frame_rate, metadata.periods_frames["start_datetime_td"].iloc[0], ) tracking_data["period_id"] = _add_periods_to_tracking_data( tracking_data["frame"], metadata.periods_frames ) tracking_data["gametime_td"] = _get_gametime( tracking_data["frame"], tracking_data["period_id"], metadata ) return tracking_data, metadata
[docs] @logging_wrapper(__file__) def load_metrica_open_tracking_data( verbose: bool = True, ) -> tuple[pd.DataFrame, Metadata]: """Function to load open dataset of metrica Args: verbose (bool): Whether or not to print info in the terminal. Defaults to True. cache_path (Path): path to save the tracking file to as cache. Returns: Tuple[pd.DataFrame, Metadata]: tracking and metadata of the game """ td_data_link = "https://raw.githubusercontent.com/metrica-sports/sample-data\ /master/data/Sample_Game_3/Sample_Game_3_tracking.txt" td_metadata_link = "https://raw.githubusercontent.com/metrica-sports/sample-data\ /master/data/Sample_Game_3/Sample_Game_3_metadata.xml" if verbose: print("Downloading Metrica open tracking data...", end="") td_data = io.StringIO(requests.get(td_data_link).text) td_metadata = requests.get(td_metadata_link).text if verbose: print(" Done!") return load_metrica_tracking_data( tracking_data_loc=td_data, metadata_loc=td_metadata, verbose=verbose )
@logging_wrapper(__file__) def _get_tracking_data( tracking_data_loc: str | io.StringIO, channels: dict, pitch_dimensions: list[float, float], verbose: bool = True, ) -> pd.DataFrame: """Function to load the tracking data of metrica. Args: tracking_data_loc (Union[str, io.StringIO]): location of the tracking data .txt file channels (dict): dictionary with for all frames the order of which players are referred to in the raw tracking data pitch_dimensions (list): x and y dimensions of the pitch in meters verbose (bool, optional): whether to print information about the progress in the terminal. Defaults to True. Returns: pd.DataFrame: tracking data of the game in a pd dataframe """ if isinstance(tracking_data_loc, str): file = open(tracking_data_loc) lines = file.readlines() file.close() else: lines = tracking_data_loc.readlines() size_lines = len(lines) data = { "frame": [np.nan] * size_lines, "ball_x": [np.nan] * size_lines, "ball_y": [np.nan] * size_lines, "ball_z": [np.nan] * size_lines, "ball_status": [None] * size_lines, "team_possession": [None] * size_lines, } if verbose: lines = tqdm( lines, desc="Writing lines to dataframe", unit=" lines", leave=False ) for idx, line in enumerate(lines): frame, players_info, ball_info = line.split(":") frame = _to_int(frame) data["frame"][idx] = frame channel = channels.loc[ (channels["start"] <= frame) & (channels["end"] >= frame), "ids", ].iloc[0] players = players_info.split(";") for i, player in enumerate(players): x, y = player.split(",") team = channel[i].split("_")[0] shirt_num = channel[i].split("_")[1] data = _add_player_tracking_data_to_dict(team, shirt_num, x, y, data, idx) x, y = ball_info.split(",") data = _add_ball_data_to_dict(x, y, np.nan, None, None, data, idx) df = pd.DataFrame(data) df["ball_status"] = np.where(pd.isnull(df["ball_x"]), "dead", "alive") for col in [x for x in df.columns if "_x" in x]: df[col] = df[col] * pitch_dimensions[0] - (pitch_dimensions[0] / 2) for col in [x for x in df.columns if "_y" in x]: df[col] = df[col] * pitch_dimensions[1] - (pitch_dimensions[1] / 2) df = _insert_missing_rows(df, "frame") return df