-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add: time_stats for numeric value columns added
- Loading branch information
1 parent
b60ff8b
commit 69d4f17
Showing
6 changed files
with
172 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import os | ||
|
||
def generate_markdown_tree(directory, prefix=""): | ||
"""Generates a markdown tree structure of a given directory, excluding __pycache__ folders.""" | ||
result = [] | ||
items = sorted(os.listdir(directory)) | ||
for index, item in enumerate(items): | ||
path = os.path.join(directory, item) | ||
|
||
# Skip __pycache__ folders | ||
if item == "__pycache__": | ||
continue | ||
|
||
connector = "├── " if index < len(items) - 1 else "└── " | ||
result.append(f"{prefix}{connector}{item}") | ||
if os.path.isdir(path): | ||
sub_prefix = "│ " if index < len(items) - 1 else " " | ||
result.extend(generate_markdown_tree(path, prefix + sub_prefix)) | ||
return result | ||
|
||
def save_markdown_tree(directory, output_file="folder_structure.md"): | ||
"""Saves the markdown tree structure to a file with utf-8 encoding, excluding __pycache__ folders.""" | ||
tree = generate_markdown_tree(directory) | ||
with open(output_file, "w", encoding="utf-8") as f: | ||
f.write("\n".join(tree)) | ||
print(f"Markdown folder structure saved to {output_file}") | ||
|
||
# Replace 'your_directory_path' with the path to your folder | ||
directory_path = './src' | ||
save_markdown_tree(directory_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
pandas==2.1.0 | ||
pdoc==14.4.0 # for docs | ||
pdoc==14.6.1 # for docs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,10 +5,10 @@ | |
|
||
setuptools.setup( | ||
name = "timeseries-shaper", | ||
version = "0.0.0.10", | ||
version = "0.0.0.11", | ||
author = "Jakob Gabriel", | ||
author_email = "[email protected]", | ||
description = "timeseries-shaper filters, transforms and abstracts your timeseries dataframe", | ||
description = "timeseries-shaper filters, transforms and engineer your timeseries dataframe", | ||
long_description = long_description, | ||
long_description_content_type = "text/markdown", | ||
url = "https://jakobgabriel.github.io/timeseries-shaper/timeseries_shaper.html", | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import pandas as pd | ||
from ..base import Base | ||
|
||
class TimeGroupedStatistics(Base): | ||
""" | ||
A class for calculating time-grouped statistics on numeric data, with class methods to apply various statistical functions. | ||
""" | ||
|
||
@classmethod | ||
def calculate_statistic(cls, dataframe: pd.DataFrame, time_column: str, value_column: str, freq: str, stat_method: str) -> pd.DataFrame: | ||
""" | ||
Calculate a specified statistic on the value column over the grouped time intervals. | ||
Args: | ||
dataframe (pd.DataFrame): The DataFrame containing the data. | ||
time_column (str): The name of the time column to group and sort by. | ||
value_column (str): The name of the numeric column to calculate statistics on. | ||
freq (str): Frequency string for time grouping (e.g., 'H' for hourly, 'D' for daily). | ||
stat_method (str): The statistical method to apply ('mean', 'sum', 'min', 'max', 'diff', 'range'). | ||
Returns: | ||
pd.DataFrame: A DataFrame with the time intervals and the calculated statistics. | ||
""" | ||
# Set the DataFrame index to the time column and resample to the specified frequency | ||
grouped_df = dataframe.set_index(time_column).resample(freq) | ||
|
||
# Select the calculation method | ||
if stat_method == 'mean': | ||
result = grouped_df[value_column].mean().to_frame('mean') | ||
elif stat_method == 'sum': | ||
result = grouped_df[value_column].sum().to_frame('sum') | ||
elif stat_method == 'min': | ||
result = grouped_df[value_column].min().to_frame('min') | ||
elif stat_method == 'max': | ||
result = grouped_df[value_column].max().to_frame('max') | ||
elif stat_method == 'diff': | ||
# Improved diff: last value - first value within each interval | ||
result = (grouped_df[value_column].last() - grouped_df[value_column].first()).to_frame('difference') | ||
elif stat_method == 'range': | ||
# Range: max value - min value within each interval | ||
result = (grouped_df[value_column].max() - grouped_df[value_column].min()).to_frame('range') | ||
else: | ||
raise ValueError("Invalid stat_method. Choose from 'mean', 'sum', 'min', 'max', 'diff', 'range'.") | ||
|
||
return result | ||
|
||
@classmethod | ||
def calculate_statistics(cls, dataframe: pd.DataFrame, time_column: str, value_column: str, freq: str, stat_methods: list) -> pd.DataFrame: | ||
""" | ||
Calculate multiple specified statistics on the value column over the grouped time intervals. | ||
Args: | ||
dataframe (pd.DataFrame): The DataFrame containing the data. | ||
time_column (str): The name of the time column to group and sort by. | ||
value_column (str): The name of the numeric column to calculate statistics on. | ||
freq (str): Frequency string for time grouping (e.g., 'H' for hourly, 'D' for daily). | ||
stat_methods (list): A list of statistical methods to apply (e.g., ['mean', 'sum', 'diff', 'range']). | ||
Returns: | ||
pd.DataFrame: A DataFrame with the time intervals and the calculated statistics for each method. | ||
""" | ||
# Initialize an empty DataFrame for combining results | ||
result_df = pd.DataFrame() | ||
|
||
# Calculate each requested statistic and join to the result DataFrame | ||
for method in stat_methods: | ||
stat_df = cls.calculate_statistic(dataframe, time_column, value_column, freq, method) | ||
result_df = result_df.join(stat_df, how='outer') | ||
|
||
return result_df | ||
|
||
@classmethod | ||
def calculate_custom_func(cls, dataframe: pd.DataFrame, time_column: str, value_column: str, freq: str, func) -> pd.DataFrame: | ||
""" | ||
Apply a custom aggregation function on the value column over the grouped time intervals. | ||
Args: | ||
dataframe (pd.DataFrame): The DataFrame containing the data. | ||
time_column (str): The name of the time column to group and sort by. | ||
value_column (str): The name of the numeric column to calculate statistics on. | ||
freq (str): Frequency string for time grouping (e.g., 'H' for hourly, 'D' for daily). | ||
func (callable): Custom function to apply to each group. | ||
Returns: | ||
pd.DataFrame: A DataFrame with the custom calculated statistics. | ||
""" | ||
grouped_df = dataframe.set_index(time_column).resample(freq) | ||
result = grouped_df[value_column].apply(func).to_frame('custom') | ||
return result |