from pathlib import Path

DATA = Path('/srv/data/flow-freshwater')

for f in sorted(DATA.rglob('*')):
    if f.is_file():
        print(f"{f.relative_to(DATA)}  ({f.stat().st_size/1e6:,.1f} MB)")

Data_FLOW_survey.xlsx  (0.1 MB)
Dateset_Explanation.pdf  (0.3 MB)

import pandas as pd

xlsx = sorted(DATA.rglob('*.xlsx'))
print('Using:', xlsx[0].name)
df = pd.read_excel(xlsx[0])
df.head()

Using: Data_FLOW_survey.xlsx

df.info()
df.describe(include='all').T.head(20)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 555 entries, 0 to 554
Data columns (total 53 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   No                     555 non-null    int64  
 1   ID                     555 non-null    object 
 2   group                  555 non-null    object 
 3   complete               555 non-null    int64  
 4   age                    546 non-null    float64
 5   gender                 543 non-null    object 
 6   attitude.1             553 non-null    float64
 7   attitude.2             550 non-null    float64
 8   attitude.3             81 non-null     float64
 9   awareness.1            550 non-null    float64
 10  awareness.2            548 non-null    float64
 11  awareness.3            81 non-null     float64
 12  behavioral_control.1   536 non-null    float64
 13  behavioral_control.2   533 non-null    float64
 14  behavioral_control.3   81 non-null     float64
 15  collective_efficacy.1  533 non-null    float64
 16  collective_efficacy.2  527 non-null    float64
 17  collective_efficacy.3  81 non-null     float64
 18  group_id.1             164 non-null    float64
 19  group_id.2             206 non-null    float64
 20  group_id.3             81 non-null     float64
 21  group_norms.1          147 non-null    float64
 22  group_norms.2          190 non-null    float64
 23  intrinsic_motivation   213 non-null    float64
 24  knowledge_score.1      546 non-null    float64
 25  knowledge_score.2      544 non-null    float64
 26  knowledge_score.3      80 non-null     float64
 27  nature_rel.1           555 non-null    float64
 28  nature_rel.2           555 non-null    float64
 29  nature_rel.3           81 non-null     float64
 30  interest.1             547 non-null    float64
 31  interest.2             545 non-null    float64
 32  past_behavior1.1       551 non-null    float64
 33  past_behavior1.2       548 non-null    float64
 34  past_behavior1.3       81 non-null     float64
 35  past_behavior2.1       542 non-null    float64
 36  past_behavior2.2       544 non-null    float64
 37  past_behavior2.3       81 non-null     float64
 38  plan_behavior1.1       550 non-null    float64
 39  plan_behavior1.2       551 non-null    float64
 40  plan_behavior1.3       81 non-null     float64
 41  plan_behavior2.1       537 non-null    float64
 42  plan_behavior2.2       539 non-null    float64
 43  plan_behavior2.3       81 non-null     float64
 44  personal_efficacy.1    537 non-null    float64
 45  personal_efficacy.2    529 non-null    float64
 46  personal_efficacy.3    81 non-null     float64
 47  personal_norms.1       548 non-null    float64
 48  personal_norms.2       545 non-null    float64
 49  personal_norms.3       81 non-null     float64
 50  skills.1               547 non-null    float64
 51  skills.2               545 non-null    float64
 52  skills.3               81 non-null     float64
dtypes: float64(48), int64(2), object(3)
memory usage: 229.9+ KB

import matplotlib.pyplot as plt

num = df.select_dtypes('number')
if num.shape[1]:
    col = num.columns[0]
    num[col].plot.hist(bins=50, figsize=(8, 4), title=col)
    plt.tight_layout()
else:
    print('No direct numeric columns: explore df on your own.')

# ⚠️ RESTORE: this DISCARDS YOUR CHANGES to this notebook and resets it to the original.
# 1. Uncomment the line below (remove the #)   2. Run this cell
# 3. Then: menu File → Reload Notebook from Disk

# !git -C ~/citizen-science-data fetch -q origin && git -C ~/citizen-science-data checkout origin/main -- flow-freshwater.ipynb && echo "Restored. Now: File → Reload Notebook from Disk"

	No	ID	group	complete	age	gender	attitude.1	attitude.2	attitude.3	awareness.1	...	plan_behavior2.3	personal_efficacy.1	personal_efficacy.2	personal_efficacy.3	personal_norms.1	personal_norms.2	personal_norms.3	skills.1	skills.2	skills.3
0	1	1A5KDSL7X1	Treatment	2	18.0	male	3.666667	3.500000	NaN	4.50	...	NaN	4.0	4.0	NaN	2.25	2.00	NaN	3.000000	3.500000	NaN
1	2	1FZN47ETFG	Treatment	2	16.0	female	5.000000	4.000000	NaN	3.50	...	NaN	4.0	4.0	NaN	4.00	2.75	NaN	1.833333	4.333333	NaN
2	3	1GB6QR7D7W	Treatment	2	17.0	female	4.500000	4.500000	NaN	4.25	...	NaN	4.0	5.0	NaN	3.50	3.75	NaN	2.000000	3.666667	NaN
3	4	1GLGSGANPU	Treatment	3	47.0	female	5.000000	5.000000	4.833333	4.75	...	3.5	5.0	5.0	5.0	5.00	5.00	4.75	4.833333	5.000000	5.0
4	5	1M6DC33S2D	Treatment	2	19.0	male	4.166667	4.666667	NaN	4.25	...	NaN	3.0	5.0	NaN	3.50	3.25	NaN	1.666667	3.500000	NaN

	count	unique	top	freq	mean	std	min	25%	50%	75%	max
No	555.0	NaN	NaN	NaN	278.0	160.358972	1.0	139.5	278.0	416.5	555.0
ID	555	555	1A5KDSL7X1	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN
group	555	3	Control2	250	NaN	NaN	NaN	NaN	NaN	NaN	NaN
complete	555.0	NaN	NaN	NaN	2.145946	0.353371	2.0	2.0	2.0	2.0	3.0
age	546.0	NaN	NaN	NaN	42.175824	19.516971	13.0	25.0	41.0	59.0	84.0
gender	543	2	female	281	NaN	NaN	NaN	NaN	NaN	NaN	NaN
attitude.1	553.0	NaN	NaN	NaN	4.230199	0.636721	1.0	4.0	4.333333	4.666667	5.0
attitude.2	550.0	NaN	NaN	NaN	4.254242	0.665762	1.5	4.0	4.333333	4.833333	5.0
attitude.3	81.0	NaN	NaN	NaN	4.469136	0.500077	3.0	4.0	4.666667	4.833333	5.0
awareness.1	550.0	NaN	NaN	NaN	4.276364	0.614444	2.0	4.0	4.5	4.75	5.0
awareness.2	548.0	NaN	NaN	NaN	4.412865	0.60181	1.75	4.0	4.5	5.0	5.0
awareness.3	81.0	NaN	NaN	NaN	4.657407	0.329404	3.5	4.5	4.75	5.0	5.0
behavioral_control.1	536.0	NaN	NaN	NaN	3.6875	0.87745	1.0	3.0	3.5	4.5	5.0
behavioral_control.2	533.0	NaN	NaN	NaN	3.704503	0.82911	1.0	3.0	3.5	4.5	5.0
behavioral_control.3	81.0	NaN	NaN	NaN	3.679012	0.841534	1.5	3.0	3.5	4.5	5.0
collective_efficacy.1	533.0	NaN	NaN	NaN	3.953096	0.965405	1.0	3.0	4.0	5.0	5.0
collective_efficacy.2	527.0	NaN	NaN	NaN	3.99241	0.941209	1.0	3.0	4.0	5.0	5.0
collective_efficacy.3	81.0	NaN	NaN	NaN	4.148148	0.807947	2.0	4.0	4.0	5.0	5.0
group_id.1	164.0	NaN	NaN	NaN	3.896341	0.950421	1.0	3.0	4.0	5.0	5.0
group_id.2	206.0	NaN	NaN	NaN	4.082524	0.84852	1.0	4.0	4.0	5.0	5.0

FLOW: Participant Survey Data from Freshwater Citizen Science Project¶

What's in the dataset¶

Load the data¶

First look¶

A first chart¶

Your turn¶