-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlongbio.py
28 lines (20 loc) · 914 Bytes
/
longbio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd
import statsmodels.api as sm
# Load the users data from 'users.csv'
users_df = pd.read_csv('users.csv')
# Step 2: Calculate the length of the bio in words
# Ignore users without bios
users_df['bio_word_count'] = users_df['bio'].apply(lambda x: len(x.split()) if pd.notnull(x) else 0)
# Filter out users with no bio
filtered_users_df = users_df[users_df['bio_word_count'] > 0]
# Step 3: Prepare the data for regression
X = filtered_users_df['bio_word_count'] # Independent variable
y = filtered_users_df['followers'] # Dependent variable
# Add a constant to the independent variable (for the intercept)
X = sm.add_constant(X)
# Fit the regression model
model = sm.OLS(y, X).fit()
# Get the slope for bio_word_count
slope = model.params['bio_word_count']
# Step 4: Print the slope rounded to 3 decimal places
print(f"Regression slope of followers on bio word count: {slope:.3f}")