Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a binned histogram by dump size to compute incremental estimates. #59

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Estimate compression rates based on similarly sized backups
opoplawski committed Apr 2, 2019
commit 7ef98ea8e639d94c7ed453caf3e727c9741de45c
58 changes: 58 additions & 0 deletions server-src/amadmin.c
Original file line number Diff line number Diff line change
@@ -52,6 +52,8 @@ int main(int argc, char **argv);
void usage(void);
static void estimate(int argc, char **argv);
static void estimate_one(disk_t *dp);
static void perfhist(int argc, char **argv);
static void perfhist_one(disk_t *dp);
void call_amadmin_perl(int argc, char **argv);
void info(int argc, char **argv);
void info_one(disk_t *dp);
@@ -146,6 +148,8 @@ static const struct {
T_(" [<hostname> [<disks>]* ]* # Export curinfo database to stdout.") },
{ "import", import_db,
T_("\t\t\t\t # Import curinfo database from stdin.") },
{ "perfhist", perfhist,
T_(" [<hostname> [<disks>]* ]*\t# Output compression rate histogram") },
};
#define NCMDS G_N_ELEMENTS(cmdtab)

@@ -483,6 +487,60 @@ estimate(
}


/* ----------------------------------------------- */

static void
perfhist_one(
disk_t * dp)
{
char *hostname = dp->host->hostname;
char *diskname = dp->name;
char *qhost = quote_string(hostname);
char *qdisk = quote_string(diskname);
info_t info;
double *comp_avgs;
int level, nhist, i;

get_info(hostname, diskname, &info);

for(level=0;level<=1;level++) {
setup_perf_hist(&info, level);
comp_avgs = (level==0) ? info.comp_avgs_full : info.comp_avgs_incr;
nhist = (level==0) ? info.nhist_full : info.nhist_incr;

if(nhist) {
printf("%s %s %d", qhost, qdisk, level);
for(i=0;i<64;i++) {
if(comp_avgs[i] > 0.0) printf(" %d(%f)",i,comp_avgs[i]);
}
printf("\n");
} else {
printf("%s %s %d no history\n", qhost, qdisk, level);
}
}

amfree(qhost);
amfree(qdisk);
}


static void
perfhist(
int argc,
char ** argv)
{
GList *dlist;
disk_t *dp;

if(argc >= 4)
diskloop(argc, argv, "perfhist", perfhist_one);
else
for(dlist = diskq.head; dlist != NULL; dlist = dlist->next) {
dp = dlist->data;
perfhist_one(dp);
}
}

/* ----------------------------------------------- */

void
138 changes: 138 additions & 0 deletions server-src/infofile.c
Original file line number Diff line number Diff line change
@@ -37,6 +37,8 @@
#include "amutil.h"

static void zero_info(info_t *);
static int get_full_history(history_t *in, history_t **full);
static int get_incr_history(history_t *in, history_t **incr);

static char *infodir = NULL;
static char *infofile = NULL;
@@ -609,6 +611,140 @@ perf_average(
return sum / n;
}

int get_full_history(
history_t *in,
history_t **full)
{
int n, i;
history_t *hist;

for(i=0,n=0;i<=NB_HISTORY;i++) {
if(in[i].level == 0) n++;
}
if(n == 0) return 0;

*full = hist = g_malloc(n * sizeof(history_t));
for(i=0,n=0;i<=NB_HISTORY;i++) {
if(in[i].level == 0) memcpy(&hist[n++], &in[i], sizeof(history_t));
}

return n;
}

int
get_incr_history(
history_t *in,
history_t **incr)
{
int n, i;
history_t *hist;

for(i=0,n=0;i<=NB_HISTORY;i++) {
if(in[i].level > 0) n++;
}
if(n == 0) return 0;

*incr = hist = g_malloc(n * sizeof(history_t));
for(i=0,n=0;i<=NB_HISTORY;i++) {
if(in[i].level > 0) memcpy(&hist[n++], &in[i], sizeof(history_t));
}

return n;
}

void
setup_perf_hist(
info_t * info,
int level)
{
history_t *history = 0;
int n, *nhist, i, bin, compn[64];
off_t size;
double *comp_avgs;

if(level == 0) {
comp_avgs = info->comp_avgs_full;
nhist = &info->nhist_full;
*nhist = n = get_full_history(info->history, &history);
} else {
comp_avgs = info->comp_avgs_incr;
nhist = &info->nhist_incr;
*nhist = n = get_incr_history(info->history, &history);
}
if(n == 0) return;

/* Total the rates into bins and count them */
memset(compn, '\0', sizeof(compn));
for(i=0; i<n && i<20; i++) { /* Limit ourselves to the most recent 20 dumps */
size = history[i].size;
if(size == 0) size = 1; /* Fudge zero into smallest bin */
bin = -1;
while (size >>= 1) bin++;
if(compn[bin]<3) { /* Only take the most recent three dumps in each bin */
comp_avgs[bin] += (double)(history[i].csize)/(double)(history[i].size);
compn[bin]++;
}
}

/* Compute the averages */
for(bin=0;bin<64;bin++) {
if(comp_avgs[bin]>0.0) comp_avgs[bin] /= (double)compn[bin];
}
}

/*
* Average historical compression rate binned by original size
*/
double
perf_hist(
info_t * info,
int level,
off_t size,
double def)
{
double *comp_avgs, comp_avg, *compp;
int bin = -1, binh, binl, n, *nhist;

if(level == 0) {
comp_avgs = info->comp_avgs_full;
compp = info->full.comp;
nhist = &info->nhist_full;
} else {
comp_avgs = info->comp_avgs_incr;
compp = info->incr.comp;
nhist = &info->nhist_incr;
}
if(*nhist == -1) setup_perf_hist(info, level);
if(*nhist == 0) return def;

while (size >>= 1) bin++;
comp_avg = comp_avgs[bin];

/* If we don't have any history at this size try the surrounding bins */
binl = binh = bin;
binl--;
binh++;
while((comp_avg <= 0.0) && ((binl > 0) || (binh < 63))) {
n = 0;
if(binl >= 0 && comp_avgs[binl] > 0.0) {
comp_avg += comp_avgs[binl];
n++;
}
if(binh <= 63 && comp_avgs[binh] > 0.0) {
comp_avg += comp_avgs[binh];
n++;
}
if(n > 0) comp_avg /= (double)n;
binl--;
binh++;
}

/* This shoudn't happen */
if(comp_avg <= 0.0) return def;

return comp_avg;
}

static void
zero_info(
info_t *info)
@@ -622,6 +758,8 @@ zero_info(
info->full.rate[i] = info->incr.rate[i] = -1.0;
}

info->nhist_full = info->nhist_incr = -1;

for(i = 0; i < DUMP_LEVELS; i++) {
info->inf[i].date = (time_t)-1;
}
6 changes: 6 additions & 0 deletions server-src/infofile.h
Original file line number Diff line number Diff line change
@@ -79,6 +79,10 @@ typedef struct info_s {
# define FORCE_LEVEL_1 8 /* force level 1 at next run */
perf_t full;
perf_t incr;
double comp_avgs_full[64];
double comp_avgs_incr[64];
int nhist_full;
int nhist_incr;
stats_t inf[DUMP_LEVELS];
int last_level, consecutive_runs;
history_t history[NB_HISTORY+1];
@@ -91,6 +95,8 @@ void close_infofile(void);
char *get_dumpdate(info_t *info, int level);
char *get_based_on_timestamp(info_t *info, int lev);
double perf_average(double *array, double def);
void setup_perf_hist(info_t * info, int level);
double perf_hist(info_t *info, int level, off_t size, double def);
int get_info(char *hostname, char *diskname, info_t *info);
int put_info(char *hostname, char *diskname, info_t *info);
int del_info(char *hostname, char *diskname);
4 changes: 2 additions & 2 deletions server-src/planner.c
Original file line number Diff line number Diff line change
@@ -1418,8 +1418,8 @@ est_csize(
return;
}

if (one_est->level == 0) ratio = ep->fullcomp;
else ratio = ep->incrcomp;
if (one_est->level == 0) ratio = perf_average(ep->info->full.comp, ep->fullcomp);
else ratio = perf_hist(ep->info, one_est->level, size, ep->incrcomp);

/*
* make sure over-inflated compression ratios don't throw off the