From ea5a6d82a3eda2c34148c3044bd9dd2bd82bdf7a Mon Sep 17 00:00:00 2001
From: Badr AlKhamissi <badr@khamissi.com>
Date: Sat, 6 Jul 2024 07:32:27 +0200
Subject: [PATCH 1/8] added support for localization

---
 .gitignore                                    |   1 -
 .../fedorenko2010_localization/__init__.py    |  33 ++++
 .../langloc_fmri_run1_stim_set1.csv           |  49 +++++
 .../langloc_fmri_run1_stim_set2.csv           |  49 +++++
 .../langloc_fmri_run1_stim_set3.csv           |  49 +++++
 .../langloc_fmri_run1_stim_set4.csv           |  49 +++++
 .../langloc_fmri_run1_stim_set5.csv           |  49 +++++
 .../langloc_fmri_run2_stim_set1.csv           |  49 +++++
 .../langloc_fmri_run2_stim_set2.csv           |  49 +++++
 .../langloc_fmri_run2_stim_set3.csv           |  49 +++++
 .../langloc_fmri_run2_stim_set4.csv           |  49 +++++
 .../langloc_fmri_run2_stim_set5.csv           |  49 +++++
 .../model_helpers/huggingface.py              |  38 +++-
 brainscore_language/model_helpers/localize.py | 174 ++++++++++++++++++
 examples/score_localization.py                |  26 +++
 15 files changed, 756 insertions(+), 6 deletions(-)
 create mode 100644 brainscore_language/data/fedorenko2010_localization/__init__.py
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set1.csv
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set2.csv
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set3.csv
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set4.csv
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set5.csv
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set1.csv
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set2.csv
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set3.csv
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set4.csv
 create mode 100644 brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set5.csv
 create mode 100644 brainscore_language/model_helpers/localize.py
 create mode 100644 examples/score_localization.py

diff --git a/.gitignore b/.gitignore
index 5f7bafda..8a001910 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,7 +136,6 @@ dmypy.json
 
 ### project specific additions:
 
-brainscore_language/data
 html
 .vscode
 *.code-workspace
diff --git a/brainscore_language/data/fedorenko2010_localization/__init__.py b/brainscore_language/data/fedorenko2010_localization/__init__.py
new file mode 100644
index 00000000..22a51d27
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/__init__.py
@@ -0,0 +1,33 @@
+import pandas as pd
+
+from glob import glob
+from pathlib import Path
+
+from brainscore_language import data_registry
+
+BIBTEX = """@article{Fedorenko2010NewMF,
+  title={New method for fMRI investigations of language: defining ROIs functionally in individual subjects.},
+  author={Evelina Fedorenko and Po-Jang Hsieh and Alfonso Nieto-Castanon and Susan L. Whitfield-Gabrieli and Nancy G. Kanwisher},
+  journal={Journal of neurophysiology},
+  year={2010},
+  volume={104 2},
+  pages={1177-94},
+  url={https://api.semanticscholar.org/CorpusID:740913}
+}"""
+
+# Code adapted from: https://github.com/bkhmsi/brain-language-suma
+
+def load_data():
+    paths = glob(f"{Path(__file__).parent }/*.csv")
+    data = pd.read_csv(paths[0])
+    for path in paths[1:]:
+        run_data = pd.read_csv(path)
+        data = pd.concat([data, run_data])
+
+    data["sent"] = data["stim2"].apply(str.lower)
+
+    for stimuli_idx in range(3, 14):
+        data["sent"] += " " + data[f"stim{stimuli_idx}"].apply(str.lower)
+    return data
+
+data_registry['Fedorenko2010.localization'] = load_data
\ No newline at end of file
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set1.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set1.csv
new file mode 100644
index 00000000..ee684017
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set1.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,JUST,THE,BAREST,SUGGESTION,OF,A,HEEL,IS,FOUND,ON,TEENAGE,PUMPS,S
+2,TO,THE,DIRECTORS,THE,PROBLEM,APPEARED,A,MATTER,OF,INTRIGUE,OR,DIPLOMACY,S
+3,THERE,WAS,LITTLE,LIKELIHOOD,OF,ANY,CUSTOMERS,WALKING,IN,AT,THAT,HOUR,S
+1,POME,OY,REE,HOLILY,SHOURN,NE,SLEOMING,WHIMP,REE,RERE,OS,OFUKE,N
+2,OT,MOMP,VO,DETLERENCE,FROT,MOGS,ELIBONCE,POLVED,RO,OP,UMMOSITE,COMBLISION,N
+3,CHITMENTS,OY,ORLS,TROR,WENDERT,COONGLIES,COURN,MOMICONLY,NE,SOOZED,AR,CONTROGOME,N
+4,E,WOSE,RO,SPEONT,REE,INTLOSSION,OY,O,COMBOUSE,FUMS,OY,CHIGSHEN,N
+5,KAKE,WEWS,BAPS,OSE,RECOSSED,REE,LENCHEN,WESEN,OY,REE,SUSSER,PRODENOTIONS,N
+6,HU,WRELT,DOIL,ET,HUS,BEP,AR,LONK,AR,HU,COURN,KNEOL,N
+4,HE,STOPPED,PACING,TO,STARE,AT,HAL,WITH,HIS,PALE,BLUE,EYES,S
+5,A,NUMBER,OF,CONSIDERATIONS,SUGGEST,THAT,THIS,OCCURS,EARLY,IN,THE,PROCESS,S
+6,TO,COMPUTE,YOUR,ADJUSTED,GROSS,INCOME,YOU,TOTAL,ALL,ITEMS,OF,INCOME,S
+7,TRORE,OS,VO,UTROSION,RO,FEOL,ONIEDY,OM,DISTOLVES,ONIET,BLESE,DOOPLE,N
+8,OMOILLY,TRORE,ONK,HORE,FOWBORS,RO,TOOD,SIKE,PLORRING,FRON,FISMS,INTLOSSIONS,N
+9,OT,WAM,O,DELOOF,RO,SHEFT,EN,HUS,MOND,RO,TOSSMICOL,TROPIENS,N
+7,HIS,WIFE,WAS,IN,DELICATE,HEALTH,AND,NURSING,AN,INFANT,WITH,MEASLES,S
+8,HE,AVOIDED,SHOWING,ANY,SURPRISE,OR,ANNOYANCE,WHEN,NO,ONE,ANSWERED,HIM,S
+9,KEITH,TOLD,PENNY,ABOUT,HIS,DREAM,TO,RETURN,TO,INDIA,AND,BURMA,S
+10,REE,UMLY,EXPOITION,RO,PLIN,OS,MERTIVE,BOES,FROT,HABS,BEMIME,PENORITES,N
+11,OT,WAM,LOKE,O,LONK,SLIN,LIGS,DRARK,SCROUGH,O,PIST,CIBSLE,N
+12,E,COURN,VOT,CLIVE,RO,HY,POMS,NOM,DOD,E,WIBS,RO,N
+10,THE,REPORTER,NODDED,AS,HE,MOVED,UP,BESIDE,HIM,AT,THE,BAR,S
+11,IN,THE,STARLIGHT,HE,COULD,SEE,THE,TREES,STRIPPED,OF,THEIR,LEAVES,S
+12,THE,TARGET,CHART,QUICKLY,AND,BRIEFLY,TELLS,YOU,WHICH,ADDITIVES,DO,WHAT,S
+13,NEVER,AGAIN,DID,HE,ENTER,INTO,THE,RITUAL,OF,SHOWING,THE,APARTMENT,S
+14,THEN,ANGELINA,TURNED,AND,WITH,AN,EASY,GRACE,WALKED,TOWARD,THE,KITCHEN,S
+15,MANY,TIMES,SINCE,HIS,DEATH,THAT,MEMORY,HAD,WORRIED,AND,TROUBLED,HER,S
+13,EOKS,OY,THOTE,FOCKETS,WAM,OY,CREOT,VOSUE,RO,OTS,MIGHTBEL,REMETIERS,N
+14,E,RURNED,OSE,WONCHED,HOM,STRILE,DOIL,REE,CISTRE,OY,REE,FOUD,N
+15,HU,WAM,POOTYING,O,BADES,OY,MOURLOUGH,BISPOUTS,FOM,REE,TULSH,UDEN,N
+16,KYOTO,IS,THE,ANCIENT,CAPITAL,OF,JAPAN,AND,STILL,ITS,CULTURAL,CENTER,S
+17,IT,IS,VERY,MUCH,A,MATTER,OF,BUILDING,THE,FOUNDATIONS,OF,COMMUNITY,S
+18,THE,IMAGES,CAN,EASILY,BE,ALIGNED,WITH,A,HIGH,DEGREE,OF,ACCURACY,S
+16,NIR,GLORKS,SWUBS,POMS,REE,CHOINER,SLERE,REE,WRO,CLIRERS,DERE,SHENDING,N
+17,SNUSTIONS,CADE,RO,TE,TROR,ORS,SIFFS,ONIET,HY,WONDE,CITICORPHIP,OUDETITIES,N
+18,E,INTROFORNE,HY,FRIEST,FANDY,RO,PEPS,OSE,DE,MOMP,OURBOLVES,CORCOINOBLE,N
+19,LUT,REE,UMLY,LOND,E,WAM,GOVING,HOM,WAM,REE,PUNE,KILE,N
+20,HU,BUCKOTES,RO,NIR,TROR,REE,WOOR,OSE,CHE,SLICKED,CLIONLY,OUTNODE,N
+21,CY,PLIN,TICE,LOLKS,NURGERS,OY,REE,ORIENCE,HUD,LELD,REE,HORS,N
+19,THIS,HAPPENED,IN,THE,MIDDLE,OF,A,DRINKING,BOUT,WITH,ANOTHER,BUM,S
+20,WINSTON,TOOK,THE,CLOTHESBRUSH,OUT,OF,THE,CLOSET,AND,WENT,TO,WORK,S
+21,THE,ROOF,BLOCKS,ARE,IN,TWO,LAYERS,AND,ARE,NOT,MORTARED,TOGETHER,S
+22,I,WAS,HELD,UP,A,BIT,TRYING,TO,MAKE,A,LEFT,TURN,S
+23,HE,CALLED,THE,STORE,OWNER,AND,TOGETHER,THEY,WENT,INTO,THE,STOCKROOM,S
+24,AFTER,THAT,HE,WAS,NEVER,KNOWN,TO,RUN,OR,EVEN,WALK,FAST,S
+22,REE,GISK,KNEENED,CY,NIR,HUBTOND,PITH,OVE,ORL,ET,HUS,BAPS,N
+23,MIMP,CAINTS,REE,BOMS,JUMS,AR,REE,BONCHER,SLOD,INNI,REE,BAVE,N
+24,SHINNON,TICED,OT,SU,FROT,QUEY,ROSS,EN,OP,MOUR,AULER,PIGHTFOME,N
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set2.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set2.csv
new file mode 100644
index 00000000..0cded43e
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set2.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,PART,OF,THE,FAMILY,SHOULD,BE,SLEEPING,WHILE,THE,REST,IS,AWAKE,S
+2,IT,MADE,NO,DIFFERENCE,THAT,MOST,EVIDENCE,POINTS,TO,AN,OPPOSITE,CONCLUSION,S
+3,SHIPMENTS,OF,ARMS,FROM,WESTERN,COUNTRIES,COULD,SIMILARLY,BE,SEIZED,AS,CONTRABAND,S
+1,HU,ONVINTED,O,STOLT,OY,TOCOL,ESDERTS,OSE,OY,PORY,ZOOSOUS,GEMUNTEERS,N
+2,HU,BEXES,HUS,OPPREYES,OD,REE,BEROOF,FROT,OTUBLING,CAG,LILLOW,OTUBLING,N
+3,EN,PLIN,SCOY,TRORE,DERE,SU,PORY,STORECTEST,OSE,SU,MUME,DETIVE,N
+4,DRUTTY,OCTOILED,REE,PEVISION,PITH,INDETBERESTS,OSE,DOD,VOT,URTER,REE,ONCUMENCE,N
+5,PLIN,PROCEFOSE,OS,MUME,HORE,EGGOCTIVE,FRON,GOVING,OOT,O,ROMMERSHIP,MICKET,N
+6,MOGS,SCHOAT,MYNTEMS,LOGOY,MOILBOIN,O,MYNTEM,OY,CUROCOTIVE,REGONDS,OY,MUPOLS,N
+4,I,WANT,TO,CREATE,THE,IMPRESSION,OF,A,COMPOUND,FULL,OF,CHILDREN,S
+5,KATE,WENT,BACK,AND,REMINDED,THE,KITCHEN,WOMEN,OF,THE,SUPPER,PREPARATIONS,S
+6,HE,KNELT,DOWN,AT,HIS,BED,AS,LONG,AS,HE,COULD,KNEEL,S
+7,SOLFORE,HUD,MOFFS,BEOVES,REE,RORMS,OY,REE,LIFFS,TROR,REE,GONDOW,N
+8,OSE,PLIN,WAM,LEFOSE,HU,BENON,RO,SCOY,HUS,STISQUINGLY,BOOLTISUL,JOWD,N
+9,OT,OSTEORED,FROT,REE,PLEUPY,COURN,NE,SOMES,EN,OVE,WOB,UMLY,N
+7,THERE,IS,NO,OCCASION,TO,FEEL,UNEASY,OR,DISTURBED,ABOUT,THESE,PEOPLE,S
+8,USUALLY,THERE,ARE,MORE,FACTORS,TO,GOOD,SITE,PLANNING,THAN,FIRST,IMPRESSIONS,S
+9,IT,WAS,A,RELIEF,TO,SHIFT,IN,HIS,MIND,TO,TECHNICAL,PROBLEMS,S
+10,REE,TROPLEN,OY,SYMICILITY,OSE,MOTOLE,OGICS,INVIQUES,REE,CONGOPT,OY,VOSUTS,N
+11,E,CRODGED,REE,BOOLTISELLY,FURNOINED,FIDING,ROOR,RO,REE,POMP,YORROW,PHOVE,N
+12,VO,SPIEND,WOURN,UDER,SNINK,OY,ORGING,HOM,RO,FE,SUME,SNINGS,N
+10,THE,ONLY,EXCEPTION,TO,THIS,IS,CERTAIN,BEES,THAT,HAVE,BECOME,PARASITES,S
+11,IT,WAS,LIKE,A,LONG,THIN,LINE,DRAWN,THROUGH,A,PINK,CIRCLE,S
+12,I,COULD,NOT,CLING,TO,MY,PAST,NOR,DID,I,WISH,TO,S
+13,EACH,OF,THOSE,TICKETS,WAS,OF,GREAT,VALUE,TO,ITS,RIGHTFUL,RECIPIENT,S
+14,I,TURNED,AND,WATCHED,HIM,STRIDE,DOWN,THE,CENTER,OF,THE,ROAD,S
+15,HE,WAS,READYING,A,BATCH,OF,SOURDOUGH,BISCUITS,FOR,THE,DUTCH,OVEN,S
+13,THUN,HU,NOLIGNS,FROT,REE,DRE,DOOD,OY,REE,WHOOLS,HUD,SLELLEN,N
+14,REE,HOLL,WAM,OLPO,O,RECUNK,OY,OLBUST,O,DEGOLL,OY,WOFT,N
+15,REE,MEP,WROW,TROR,REE,COND,HUD,WIED,OOT,OY,REE,SKO,N
+16,HER,GLANCE,SWUNG,PAST,THE,TRAILER,WHERE,THE,TWO,DRIVERS,WERE,STANDING,S
+17,QUESTIONS,CAME,TO,ME,FROM,ALL,SIDES,ABOUT,MY,WORLD,CITIZENSHIP,ACTIVITIES,S
+18,I,INTRODUCED,MY,FRIEND,LARRY,TO,POPS,AND,WE,MADE,OURSELVES,COMFORTABLE,S
+16,CHE,SWEW,NIR,NOKE,OD,O,LISTUE,OSE,OBONED,REE,COSE,FODDLE,N
+17,OT,WAM,O,ROMPLE,ICEFY,FRUCISOX,WHISS,HY,SETHER,HUD,GETEN,TE,N
+18,QUEY,OPURGED,O,PROGREMNS,CY,WHISS,NOUIMITANO,WAM,DITOCED,INNI,FOVE,MINTRICTS,N
+19,REE,EMOKEROCKS,OD,PLIN,UTROSION,TET,O,STONNOCK,FOM,POPLEQUENT,DASSCOUTH,VETHERIVES,N
+20,PLIN,OS,O,PURPIFICONT,OPHONCE,LUT,OTS,IMPORLENCE,SHOURN,VOT,NE,OTESCEROTED,N
+21,EN,PLIN,WOB,HOO,WIST,NE,OSCO,RO,BELECT,EDY,ODMIEIT,MISTIVES,N
+19,BUT,THE,ONLY,LOVE,I,WAS,GIVING,HIM,WAS,THE,PURE,KIND,S
+20,HE,BECKONED,TO,HER,FROM,THE,DOOR,AND,SHE,SLIPPED,QUIETLY,OUTSIDE,S
+21,BY,THIS,TIME,LARGE,NUMBERS,OF,THE,AUDIENCE,HAD,LEFT,THE,HALL,S
+22,THE,GIRL,KNEELED,BY,HER,HUSBAND,WITH,ONE,ARM,AT,HIS,BACK,S
+23,MIKE,CAUGHT,THE,BALL,JUST,AS,THE,CATCHER,SLID,INTO,THE,BAG,S
+24,BRANNON,TIMED,IT,SO,THAT,THEY,RODE,IN,AN,HOUR,AFTER,NIGHTFALL,S
+22,WHOSS,HOLILIES,ONK,SEVING,OSE,RECIGOL,FILDS,ONK,BOOLED,FOM,MORCED,OFO,N
+23,REE,DINCONUING,SYLERVIMOTION,OY,BLESE,TORCES,OS,O,RESTLY,LUT,BODOSSORY,STORESS,N
+24,REE,SLIN,LAN,MOFFS,SWOMELY,RO,REE,PHOVE,OSE,MIOLVES,O,NURGER,N
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set3.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set3.csv
new file mode 100644
index 00000000..83ed9dfd
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set3.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,HE,ENLISTED,A,STAFF,OF,LOYAL,EXPERTS,AND,OF,MANY,ZEALOUS,VOLUNTEERS,S
+2,HE,BASES,HIS,APPROACH,ON,THE,BELIEF,THAT,ANYTHING,CAN,FOLLOW,ANYTHING,S
+3,IN,THIS,PLAY,THERE,WERE,SO,MANY,CHARACTERS,AND,SO,MUCH,DETAIL,S
+1,LOGOY,WHOSS,GROILS,ONK,FROFTLY,CROUND,ELKBY,DUY,OSE,BUKED,INNI,CLEOD,N
+2,REE,RILKS,RO,LEOMS,FUCOCIES,SHOURN,NE,PELPRONTIOLLY,REGUGNS,OSE,OBTINOSSLY,ELUSIPATED,N
+3,LEENS,CAG,FOGE,OP,IMPORLENT,CLIDGE,BESHOON,REE,PRESIBOTE,OSE,REE,FOPURCY,N
+4,QUAT,MOLLERED,WAM,FROT,O,NER,CONGOPT,OY,EMERINUTE,WAM,COING,BORK,N
+5,CUYS,WAM,SOO,INVINGED,EN,HUS,OPE,TROPIENS,RO,POB,MUME,OTLINSION,N
+6,UNENGES,OSE,GROPEGREET,ONK,SHICKED,TROR,CLERIGO,WEOFLY,TROR,OP,OMRONIC,FASK,N
+4,SCOTTY,ACCEPTED,THE,DECISION,WITH,INDIFFERENCE,AND,DID,NOT,ENTER,THE,ARGUMENTS,S
+5,THIS,PROCEDURE,IS,MUCH,MORE,EFFECTIVE,THAN,GIVING,OUT,A,MEMBERSHIP,PACKET,S
+6,MOST,SCHOOL,SYSTEMS,TODAY,MAINTAIN,A,SYSTEM,OF,CUMULATIVE,RECORDS,OF,PUPILS,S
+7,REE,WRO,PURROTATE,FOLTED,FREIR,GOYON,ONIET,KNUNTY,YLPHS,TROR,REE,WOOR,N
+8,BOME,DEET,ONK,BELLER,FOM,SUME,WOFT,FRON,EDY,FOGE,OY,DRONNER,N
+9,SOTHPROT,SOLILIR,INGUFFIPOTIONS,HABS,BOUN,MOMP,CY,PUTICOL,USCICERS,EN,UNTER,ETEOS,N
+7,SOMEONE,HAD,MOVED,BEYOND,THE,RANGE,OF,THE,LIGHT,FROM,THE,WINDOW,S
+8,AND,THIS,WAS,BEFORE,HE,BEGAN,TO,PLAY,HIS,STARTLINGLY,BEAUTIFUL,JAZZ,S
+9,IT,APPEARED,THAT,THE,THEORY,COULD,BE,SAVED,IN,ONE,WAY,ONLY,S
+10,VISS,UCIPER,OS,SETHER,OY,FOVE,CHIGSHEN,OSE,WISM,OY,O,POWRITIST,N
+11,QUOTHER,OM,VOT,WANTY,LOMS,HUS,JOW,WAM,VO,CONCUCT,OY,MONE,N
+12,FOM,JUMS,OP,INSTONE,HU,PROUGHT,OY,OMMODING,RO,FREM,FOM,HERF,N
+10,THE,PROBLEM,OF,SOLIDARITY,AND,MORALE,AGAIN,INVOLVES,THE,CONCEPT,OF,VALUES,S
+11,I,CROSSED,THE,BEAUTIFULLY,FURNISHED,LIVING,ROOM,TO,THE,PALE,YELLOW,PHONE,S
+12,NO,CLIENT,WOULD,EVER,THINK,OF,ASKING,HIM,TO,DO,SUCH,THINGS,S
+13,THEN,HE,NOTICED,THAT,THE,DRY,WOOD,OF,THE,WHEELS,HAD,SWOLLEN,S
+14,THE,HULL,WAS,ALSO,A,RESULT,OF,ALMOST,A,DECADE,OF,WORK,S
+15,THE,RED,GLOW,FROM,THE,COVE,HAD,DIED,OUT,OF,THE,SKY,S
+13,PONENDS,ONK,UPTIN,CONGORNED,FROT,OMPREGONTIC,IDTHIOSSES,MAK,COIRE,REETH,RO,DEBEY,N
+14,E,GOL,O,WHICK,LOAK,ET,FREIR,FOPES,AR,DE,WEWS,POMS,N
+15,PIFFURES,CAG,NE,LOZEN,EN,REE,PEMLIC,ETEOS,OSE,QUEN,OD,TEERS,N
+16,SHE,BLEW,HER,NOSE,ON,A,TISSUE,AND,OPENED,THE,COKE,BOTTLE,S
+17,IT,WAS,A,SIMPLE,IVORY,CRUCIFIX,WHICH,MY,MOTHER,HAD,GIVEN,ME,S
+18,THEY,ADOPTED,A,PROGRAM,BY,WHICH,LOUISIANA,WAS,DIVIDED,INTO,FIVE,DISTRICTS,S
+16,BELLER,SHOB,OT,RO,LIOTA,OSE,MEE,EF,HU,HUD,OCESTED,OTUBLING,N
+17,TRORE,WAM,REE,REDUCETION,EN,LIGET,WHISS,DE,STEVEPSED,DOD,VOT,ECITS,N
+18,CY,REE,TICE,HU,HUD,SMOLED,SQUEE,LEGORMETTES,HU,HUD,COYERS,DOIL,N
+19,WRO,CHIRPLY,CONWRERKING,SPOWES,DESOGGED,FOM,PEMLIC,ENHEPMENT,ONK,NOY,OD,DISTREY,N
+20,WHIKE,HOULE,HOGILCOTIVE,OYERS,DERE,STULL,CONVIDOTE,REE,BIGS,WOURN,PODE,INDECT,N
+21,AIRE,PUCKED,UD,REE,TIMEL,CHE,WAM,HEDDING,FOM,REE,FOBBITOL,GOULD,N
+19,THE,EXUBERANCE,ON,THIS,OCCASION,SET,A,STANDARD,FOR,SUBSEQUENT,DARTMOUTH,GATHERINGS,S
+20,THIS,IS,A,SIGNIFICANT,ADVANCE,BUT,ITS,IMPORTANCE,SHOULD,NOT,BE,EXAGGERATED,S
+21,IN,THIS,WAY,YOU,WILL,BE,ABLE,TO,DETECT,ANY,OBVIOUS,MISTAKES,S
+22,WHOLE,FAMILIES,ARE,MOVING,AND,REMOVAL,FIRMS,ARE,BOOKED,FOR,MONTHS,AHEAD,S
+23,THE,CONTINUING,MODERNIZATION,OF,THESE,FORCES,IS,A,COSTLY,BUT,NECESSARY,PROCESS,S
+24,THE,THIN,MAN,MOVED,SWIFTLY,TO,THE,PHONE,AND,DIALED,A,NUMBER,S
+22,HU,TONCED,HUS,HURTLY,FOPE,LOWOND,REE,DRE,BEP,OY,REE,RODER,N
+23,E,ORGED,ONIET,REE,BOMPLE,BESHOON,LIRR,OSE,WEOTH,EN,HUS,SCOYS,N
+24,E,WREW,EN,FROT,MOBONT,FROT,E,DOD,VOT,HABS,EDY,CHOND,N
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set4.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set4.csv
new file mode 100644
index 00000000..d648496b
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set4.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,TODAY,WHOLE,GRAINS,ARE,FRESHLY,GROUND,EVERY,DAY,AND,BAKED,INTO,BREAD,S
+2,THE,RIGHT,TO,LEAVE,LEGACIES,SHOULD,BE,SUBSTANTIALLY,REDUCED,AND,ULTIMATELY,ELIMINATED,S
+3,DEANS,CAN,FORM,AN,IMPORTANT,BRIDGE,BETWEEN,THE,PRESIDENT,AND,THE,FACULTY,S
+1,MARTER,WEPDEGS,OSE,HUS,CLIDE,WIST,LIMS,EN,HUS,LALE,FATEST,HOULE,N
+2,FLIUR,RO,FROT,HU,WAM,ENGEFIATED,PITH,LONK,ESBONT,OLEJURSITY,EN,BREEKDON,N
+3,REE,ORQUINTRA,WAM,ODMIEITLY,OD,OTS,MUSTLE,OSE,OT,PLOWLS,MOGS,RETLENNINGLY,N
+4,REE,SOVEAL,OSE,CRYCROCETICAL,CONSOPRASSES,OY,PLIN,CONTITOG,RO,AFFAKE,REE,ETOA,N
+5,LUT,O,SOTHPROT,HORE,DETINCTS,AMILYVOS,OY,PLIN,STORESS,MAK,NE,ITTACILATING,N
+6,LUT,E,UMLY,PROUGHT,OY,FROT,EN,REE,MIGGLE,OY,REE,NIFFS,N
+4,WHAT,MATTERED,WAS,THAT,A,NEW,CONCEPT,OF,AMERICANS,WAS,BEING,BORN,S
+5,CURT,WAS,TOO,INVOLVED,IN,HIS,OWN,PROBLEMS,TO,PAY,MUCH,ATTENTION,S
+6,ORANGES,AND,GRAPEFRUIT,ARE,SHIPPED,FROM,FLORIDA,WEEKLY,FROM,AN,ORGANIC,FARM,S
+7,HU,HOR,RO,TOPE,PITH,CLETTRATION,OSE,UNTER,EMECIALAL,DENCURBANCE,OSE,APOGOE,N
+8,LUT,REE,INRONGOTION,OD,REE,DYPIVICS,OY,SEVUCATION,WAM,UPTIN,QUIKE,MISRIALING,N
+9,CHE,WOURN,MITHER,LIMS,EN,PINGER,FRON,WIE,OY,HERELINESS,OSE,BORETUM,N
+7,THE,TWO,LAWMEN,HALTED,THEIR,WAGON,ABOUT,TWENTY,YARDS,FROM,THE,DOOR,S
+8,BARE,FEET,ARE,BETTER,FOR,SUCH,WORK,THAN,ANY,FORM,OF,SLIPPER,S
+9,SOMEWHAT,SIMILAR,INVESTIGATIONS,HAVE,BEEN,MADE,BY,MEDICAL,OFFICERS,IN,OTHER,AREAS,S
+10,OT,WOURN,HABS,BOUN,OOMY,RO,IMARSIFY,AR,OCIOG,CY,OTS,APAR,N
+11,HU,OLPO,RECOSSED,HIMRALF,FROT,HU,HUD,OP,ORUBEAL,NURGER,OY,SYNNICILITIES,N
+12,HUS,WROLL,BLONS,EAFS,DOLDED,INSAWS,REE,USIVE,OBUL,OY,HUS,FOPE,N
+10,MRS,OLIVER,IS,MOTHER,OF,FIVE,CHILDREN,AND,WIFE,OF,A,MACHINIST,S
+11,WHETHER,OR,NOT,WALLY,LOST,HIS,JOB,WAS,NO,CONCERN,OF,MINE,S
+12,FOR,JUST,AN,INSTANT,HE,THOUGHT,OF,APPEALING,TO,THEM,FOR,HELP,S
+13,PARENTS,ARE,OFTEN,CONCERNED,THAT,ORTHODONTIC,APPLIANCES,MAY,CAUSE,TEETH,TO,DECAY,S
+14,I,GOT,A,QUICK,LOOK,AT,THEIR,FACES,AS,WE,WENT,PAST,S
+15,PICTURES,CAN,BE,TAKEN,IN,THE,PUBLIC,AREAS,AND,WHEN,ON,TOURS,S
+13,HU,RURNED,OSE,RAWLS,OTHISS,REE,PALODE,CROUND,LOWOND,REE,RECK,HOULE,N
+14,SUBON,OSE,JUROE,CADE,TROR,REE,WOOR,OSE,BLAGGED,HOM,PITH,FREM,N
+15,E,GRUN,ONIET,OSE,SPITTERED,SCROUGH,REE,SNONT,ROOR,RO,REE,WOOR,N
+16,BETTER,SHOW,IT,TO,FIONA,AND,SEE,IF,HE,HAD,OMITTED,ANYTHING,S
+17,THERE,WAS,THE,REVOLUTION,IN,TIBET,WHICH,WE,PRETENDED,DID,NOT,EXIST,S
+18,BY,THE,TIME,HE,HAD,SMOKED,THREE,CIGARETTES,HE,HAD,CALMED,DOWN,S
+16,REE,CLOUNS,BULCTS,DARDWARD,OSE,BURBS,PAWDENLY,INNI,O,CREOT,BLONS,TENNEL,N
+17,PLIN,WIST,HERF,HOM,RO,GIT,OOT,OY,HUS,TOTTLE,LICKLE,SHON,N
+18,DOIL,REE,SPEE,HU,SQUIMBLED,OSE,WRELT,ET,REE,ENCE,OY,TALIAME,N
+19,HUS,YIERS,OY,CAMMESQUING,HUD,TAUNCH,HOM,REE,VOSUE,OY,GALER,DISCICRALS,N
+20,HU,RECOSSED,DATSUO,OY,O,SOLILIR,THANG,HU,HUD,WIMNELVES,EN,CHIDO,N
+21,EN,REE,CISTRE,OY,HUS,BRILLEINE,CUMBS,SUT,O,WROLL,BLONS,GLUSTCAP,N
+19,TWO,SHARPLY,CONTRASTING,PLACES,DESIGNED,FOR,PUBLIC,ENJOYMENT,ARE,NOW,ON,DISPLAY,S
+20,WHITE,HOUSE,LEGISLATIVE,AIDES,WERE,STILL,CONFIDENT,THE,BILL,WOULD,PASS,INTACT,S
+21,ANNE,PICKED,UP,THE,TOWEL,SHE,WAS,HEMMING,FOR,THE,HOSPITAL,GUILD,S
+22,HE,TILTED,HIS,HOMELY,FACE,TOWARD,THE,DRY,BED,OF,THE,RIVER,S
+23,I,ASKED,ABOUT,THE,BATTLE,BETWEEN,LIFE,AND,DEATH,IN,HIS,PLAYS,S
+24,I,KNEW,IN,THAT,MOMENT,THAT,I,DID,NOT,HAVE,ANY,CHOICE,S
+22,HU,HUD,SHET,HUS,WOOR,PITH,REE,BRADE,NURGER,SCRENTS,RO,OT,N
+23,E,FAYURED,E,COURN,STANT,PRIBLIRALLY,OTUBLING,FOM,O,COAMLE,OY,WOOKS,N
+24,MAVE,SHRULLED,OD,HUS,SPOMES,ROAT,OSE,PUCKED,UD,REE,COS,KEES,N
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set5.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set5.csv
new file mode 100644
index 00000000..6f32aaed
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run1_stim_set5.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,MR,WENDELL,AND,HIS,BRIDE,WILL,LIVE,IN,HIS,LAKE,FOREST,HOUSE,S
+2,PRIOR,TO,THAT,HE,WAS,ASSOCIATED,WITH,LONG,ISLAND,UNIVERSITY,IN,BROOKLYN,S
+3,THE,ORCHESTRA,WAS,OBVIOUSLY,ON,ITS,METTLE,AND,IT,PLAYED,MOST,RESPONSIVELY,S
+1,JUMS,REE,CONEST,SUFFISTION,OY,O,HOUL,OS,FOUSE,OD,FEETOGE,PUNES,N
+2,RO,REE,DICONTORS,REE,TROPLEN,OSTEORED,O,MOLLER,OY,INTROWLS,OM,DITLENONY,N
+3,TRORE,WAM,TOTTLE,HOGELIHOOD,OY,EDY,CETTOWERS,GOULING,EN,ET,FROT,MOUR,N
+4,HU,SLUPPED,MABING,RO,STOME,ET,HAK,PITH,HUS,POMP,DRUE,EAFS,N
+5,O,NURGER,OY,DIMBEDEROTIONS,SUFFIST,FROT,PLIN,UXCURS,OODLY,EN,REE,STORESS,N
+6,RO,COMPYNE,VOUR,OPWOSTED,GRODE,INGIME,HOO,TOTIT,ORS,ILEND,OY,INGIME,N
+4,THE,SOCIAL,AND,PSYCHOLOGICAL,CONSEQUENCES,OF,THIS,CONTINUE,TO,AFFECT,THE,AREA,S
+5,BUT,A,SOMEWHAT,MORE,DETAILED,ANALYSIS,OF,THIS,PROCESS,MAY,BE,ILLUMINATING,S
+6,BUT,I,ONLY,THOUGHT,OF,THAT,IN,THE,MIDDLE,OF,THE,NIGHT,S
+7,HUS,WISM,WAM,EN,DITIROTE,HEONSE,OSE,NYREING,OP,INFONE,PITH,MEESHES,N
+8,HU,OTUPSED,SHIMING,EDY,MURPLISE,OM,ONSUXONCE,QUEN,VO,OVE,ORSHERED,HOM,N
+9,KEETH,TORD,PUNDY,ONIET,HUS,BLEEM,RO,LEFORN,RO,EMCIO,OSE,BELMO,N
+7,HE,HAS,TO,COPE,WITH,FRUSTRATION,AND,OTHER,EMOTIONAL,DISTURBANCE,AND,ANOMIE,S
+8,BUT,THE,INFORMATION,ON,THE,DYNAMICS,OF,POPULATION,WAS,OFTEN,QUITE,MISLEADING,S
+9,SHE,WOULD,RATHER,LIVE,IN,DANGER,THAN,DIE,OF,LONELINESS,AND,BOREDOM,S
+10,REE,RENICKER,GEDDED,AR,HU,MOFFS,UD,BEBILL,HOM,ET,REE,BOM,N
+11,EN,REE,STONLISKS,HU,COURN,MEE,REE,PREES,STRICKED,OY,FREIR,LEORED,N
+12,REE,TONBET,CHOCE,DROCKLY,OSE,BRUICLY,TULLS,HOO,WHISS,OPRITIDES,FE,QUAT,N
+10,IT,WOULD,HAVE,BEEN,EASY,TO,IDENTIFY,AS,OPIUM,BY,ITS,ODOR,S
+11,HE,ALSO,REMINDED,HIMSELF,THAT,HE,HAD,AN,UNUSUAL,NUMBER,OF,POSSIBILITIES,S
+12,HIS,SMALL,BLACK,EYES,DARTED,INSIDE,THE,OLIVE,OVAL,OF,HIS,FACE,S
+13,HE,TURNED,AND,RACED,ACROSS,THE,PARADE,GROUND,TOWARD,THE,ROCK,HOUSE,S
+14,SUSAN,AND,JULIA,CAME,FROM,THE,DOOR,AND,DRAGGED,HIM,WITH,THEM,S
+15,I,SPUN,ABOUT,AND,CLATTERED,THROUGH,THE,FRONT,ROOM,TO,THE,DOOR,S
+13,NUDER,OGICS,DOD,HU,URTER,INNI,REE,SODUOL,OY,SHIMING,REE,ONIMPMENT,N
+14,THUN,ANMOCICA,RURNED,OSE,PITH,OP,OOMY,GRORT,WORVED,LOWOND,REE,LENCHEN,N
+15,PORY,TIVES,SIRKS,HUS,WEOTH,FROT,SILORY,HUD,GURRIED,OSE,TROOMLED,NIR,N
+16,THE,CLOUDS,BULGED,DOWNWARD,AND,BURST,SUDDENLY,INTO,A,GREAT,BLACK,FUNNEL,S
+17,THIS,WILL,HELP,HIM,TO,GET,OUT,OF,HIS,LITTLE,TACKLE,SHOP,S
+18,DOWN,THE,TREE,HE,SCRAMBLED,AND,KNELT,AT,THE,EDGE,OF,FOLIAGE,S
+16,KUYTO,OS,REE,ONCEINE,COTITOT,OY,JOBON,OSE,STULL,OTS,CEDDUROL,CISTRE,N
+17,OT,OS,VODY,MUME,O,MOLLER,OY,BOACHING,REE,FEELLOTIONS,OY,CORCORITY,N
+18,REE,ILEDES,CAG,OOTIPY,NE,OLYEURS,PITH,O,HISK,DETRIE,OY,IMMOROCY,N
+19,PLIN,HOMMENED,EN,REE,MIGGLE,OY,O,DRULLING,BUIT,PITH,OLUPSER,BUR,N
+20,WINSHAN,TEEK,REE,CLORNEDBRUNK,OOT,OY,REE,PLUSET,OSE,WEWS,RO,WOFT,N
+21,REE,LOUF,DROCKS,ONK,EN,WRO,TOGERS,OSE,ONK,VOT,RURTORED,LOGELLER,N
+19,HIS,YEARS,OF,CAMPAIGNING,HAD,TAUGHT,HIM,THE,VALUE,OF,WATER,DISCIPLINE,S
+20,HE,REMINDED,MATSUO,OF,A,SIMILAR,THING,HE,HAD,WITNESSED,IN,CHINA,S
+21,IN,THE,CENTER,OF,HIS,BRILLIANT,CURLS,SAT,A,SMALL,BLACK,SKULLCAP,S
+22,HE,HAD,SHUT,HIS,DOOR,WITH,THE,BRASS,NUMBER,SCREWED,TO,IT,S
+23,I,FIGURED,I,COULD,STAND,PRACTICALLY,ANYTHING,FOR,A,COUPLE,OF,WEEKS,S
+24,DAVE,SHRUGGED,ON,HIS,SPORTS,COAT,AND,PICKED,UP,THE,CAR,KEYS,S
+22,E,WAM,HERN,UD,O,BIP,FLUING,RO,MAMP,O,LELD,RURN,N
+23,HU,SHOLLED,REE,STOGS,URNER,OSE,LOGELLER,QUEY,WEWS,INNI,REE,STOREBOOM,N
+24,AULER,FROT,HU,WAM,NUDER,WROWN,RO,MUN,OM,ENON,WASK,FARS,N
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set1.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set1.csv
new file mode 100644
index 00000000..24c9eb24
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set1.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,OT,BEPIME,SU,MOMOCOLOUS,AR,RO,REEM,O,POME,OY,REE,CLIOLNESS,N
+2,E,WAM,GRORETUL,FOM,FREIR,INSOLLS,INNI,HY,NIED,FOM,PLIN,ENVENIESTS,N
+3,PORY,OY,FREM,HABS,TRUFTED,INNI,REE,CERIES,OSE,TORMS,OSE,SEOBOLES,N
+1,HE,GIVES,CREDIT,FOR,THE,PROMOTION,TO,HIS,NEW,OUTLOOK,ON,LIFE,S
+2,NEED,FOR,NOVELTY,MAY,BE,A,SYMPTOM,OF,CULTURAL,FATIGUE,AND,INSTABILITY,S
+3,HE,SAT,UP,AND,WATCHED,AS,THEY,PULLED,THEMSELVES,OVER,THE,STERN,S
+4,HE,LEFT,THE,REST,OF,HIS,THINGS,AND,RETURNED,TO,THE,LOBBY,S
+5,CHINESE,AND,INDIAN,MERCHANTS,ACROSS,THE,STREET,WERE,SLAMMING,THEIR,STEEL,SHUTTERS,S
+6,A,CANDLE,ALIGHT,IN,THE,AIR,DIRECTS,ITS,FLAME,AND,SMOKE,UPWARDS,S
+4,HET,UR,BRIGULOTE,O,TOTTLE,OD,REE,SUPIMUM,RIZE,OY,REE,ONOLUSSO,N
+5,PLIN,DE,BERIECE,WIST,PELPRONTIOLLY,SHEEDEN,REE,SOTENTEEL,MONJET,FOM,REE,ECREYMENT,N
+6,BONSORIONS,HABS,HUD,WRO,REOBENS,FOM,PERLONTING,SU,LONK,EN,FREIR,INGUFFIPOTIONS,N
+7,THIS,CONFORMITY,REPRESENTS,A,DESPERATE,ATTEMPT,TO,STABILIZE,A,HOPELESSLY,UNSTABLE,ENVIRONMENT,S
+8,THE,OTHER,PATRONS,WERE,TAXI,DRIVERS,AND,ART,STUDENTS,AND,SMALL,SHOPKEEPERS,S
+9,MIKE,PASSED,THROUGH,IT,AND,MOVED,TOWARD,THE,DARK,MASS,OF,HORSES,S
+7,NISITOST,ONK,WELMIME,RO,COTH,MEE,QUAT,BLESE,MIDIGOTER,SETHERS,CAG,FE,N
+8,REE,FISMS,SOTURCIP,EN,EOKS,MOIFS,OS,TET,OBILL,FOM,NER,RECOLDIVES,N
+9,CUDY,HOUT,EN,REE,CLOTE,FLONTERS,WIST,HERF,TEEP,UD,REE,BOMPEROTERT,N
+10,AT,ONCE,A,BEVY,OF,DOGS,WAS,SNAPPING,AND,SNARLING,AROUND,HIM,S
+11,AN,OBJECTIVE,SCALE,WAS,DEVELOPED,FOR,RATING,SCHOOL,NEIGHBORHOODS,FROM,THESE,DATA,S
+12,WE,SPEND,MILLIONS,OF,DOLLARS,EVERY,YEAR,ON,FORTUNE,TELLERS,AND,SOOTHSAYERS,S
+10,RORING,MOBONTS,OY,UNTEMNS,BRIBIS,REE,SEPHEGMITILITY,OY,MOTOCICOL,TEORERS,OS,OPERPROISING,N
+11,CHE,PUCKED,UD,REE,CONY,OSE,NUFFLED,NIR,DAT,WOSK,TOTTLE,NENS,N
+12,HU,JUBBED,UD,OSE,RURNED,OTOUSE,RO,MEE,REE,SOTOL,WOOR,PLUSING,N
+13,TRORE,WAM,TOTTLE,CHORKS,ARYUMS,WOURN,URTER,PLIN,CHOFT,RORING,REE,GONTER,N
+14,E,HUD,HELT,REE,DRORD,QUEY,DERE,ROXING,WHIMP,SOUSSING,REE,STOITS,N
+15,HU,SELVED,MITHER,FRON,HEORN,REE,GOIR,FROT,SWECS,OTHISS,REE,ORIENCE,N
+13,HE,SAT,DOWN,ON,AN,OLD,BOX,AND,FOCUSED,ON,THE,PROBLEM,S
+14,THIS,IS,AN,ASSUMPTION,WITH,WHICH,FEW,WOULD,BE,DISPOSED,TO,QUARREL,S
+15,IT,WAS,A,ROUGH,LONG,RIDE,THROUGH,THE,MUD,AND,POT,HOLES,S
+16,QUEY,POFFED,PONCHES,FROT,DERE,FROPED,DORD,CREY,OGIMMS,REE,BLONS,HOLLS,N
+17,WHIKE,RUPPER,OS,REE,CROUND,SEOD,OY,REE,COGGON,BLONS,RUPPER,FREOT,N
+18,FINNA,CLIONLY,GOL,O,CROOM,OSE,PRORTED,RO,PREEP,UD,REE,SOCOR,N
+16,COMPUTERS,ARE,BEING,USED,TO,KEEP,BRANCH,INVENTORIES,AT,MORE,WORKABLE,LEVELS,S
+17,THERE,ARE,THOUSANDS,OF,SQUARE,MILES,OF,SALT,PAN,WHICH,ARE,HIDEOUS,S
+18,I,WENT,TO,VISIT,ALFRED,IN,THE,KINGSTON,HOSPITAL,A,FEW,TIMES,S
+19,HE,SEEMED,TO,BE,LOOKING,AT,A,POINT,ABOVE,THE,LITTLE,WINDOW,S
+20,ALMOST,NO,EMPIRICAL,WORK,HAS,BEEN,DONE,ON,THE,PROBLEM,OF,ALIENATION,S
+21,IF,WE,LOOK,AT,RECENT,ART,WE,FIND,IT,PREOCCUPIED,WITH,FORM,S
+19,OD,REE,WOB,HU,SLUPPED,ET,REE,DELK,RO,REMIEVE,HUS,MOGE,N
+20,PLIN,WAM,WONE,PITH,FUMS,KNORNEPTS,FROT,TRORE,WOURN,NE,VO,EDIGOGIC,N
+21,AMSHED,WORVED,POMS,HOM,GOTHOUT,O,WOIL,OSE,GOL,INNI,REE,COS,N
+22,OT,WAM,REE,NIFFS,DRAGTON,HUD,TROCKED,FREM,EN,REE,ROKER,GODE,N
+23,CHE,WAM,FOUSE,REE,DUY,AULER,ET,REE,BOTTOP,OY,REE,FLITT,N
+24,WIME,GONDOWS,PITH,PORY,WROLL,TEORED,POKED,SWECS,OTHISS,REE,USSER,PROSIES,N
+22,YOU,COULD,WIN,A,POPULARITY,CONTEST,AT,THAT,SCHOOL,WITHOUT,ANY,TROUBLE,S
+23,SUSAN,AND,JULIA,RIPPED,STRIPS,FROM,THEIR,CLOTHING,AND,BOUND,THE,INJURY,S
+24,WE,HAD,BECOME,GOOD,FRIENDS,DURING,MY,STAY,AT,COOK,COUNTY,HOSPITAL,S
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set2.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set2.csv
new file mode 100644
index 00000000..c88bfebf
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set2.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,REPPIES,OY,PLIN,TUTTER,DERE,MOMP,IMICTOBLE,RO,REE,PRELT,OSE,PEMLIC,N
+2,NER,UCUOS,FOM,IMBRIGING,TIWRITION,CADE,PITH,REE,CHOVY,OY,SORD,CHOOTMENT,N
+3,QUEY,ONK,FLUING,RO,DIDORSTROTE,SOTH,DETLERENT,WOSS,OY,FOUCHING,OSE,FOURNING,N
+1,IT,BECAME,SO,MONOTONOUS,AS,TO,SEEM,A,PART,OF,THE,QUIETNESS,S
+2,I,WAS,GRATEFUL,FOR,THEIR,INSIGHT,INTO,MY,NEED,FOR,THIS,EXPERIENCE,S
+3,MANY,OF,THEM,HAVE,DRIFTED,INTO,THE,CITIES,AND,TOWNS,AND,SEAPORTS,S
+4,LET,US,SPECULATE,A,LITTLE,ON,THE,MAXIMUM,SIZE,OF,THE,ANACONDA,S
+5,THIS,WE,BELIEVE,WILL,SUBSTANTIALLY,BROADEN,THE,POTENTIAL,MARKET,FOR,THE,EQUIPMENT,S
+6,HISTORIANS,HAVE,HAD,TWO,REASONS,FOR,PERSISTING,SO,LONG,IN,THEIR,INVESTIGATIONS,S
+4,E,GORTED,RO,HERF,SU,FROT,DE,COURN,FIMP,TICE,RO,SCOY,N
+5,QUEY,ONK,REE,MOGS,BOOLTISUL,POME,OY,FROT,TOTTLE,PIEKS,OY,LOTURE,N
+6,HU,GORTED,RO,GE,BAPS,RO,HANHARD,FOM,OLUPSER,YLPH,OY,SCAYPHIDING,N
+7,VISITORS,ARE,WELCOME,TO,COME,SEE,WHAT,THESE,DEDICATED,MOTHERS,CAN,DO,S
+8,THE,FIRST,SATURDAY,IN,EACH,MONTH,IS,SET,ASIDE,FOR,NEW,RECORDINGS,S
+9,BODY,HEAT,IN,THE,CLOSE,QUARTERS,WILL,HELP,KEEP,UP,THE,TEMPERATURE,S
+7,AULER,FROT,QUEY,HUD,SUT,FOM,FOVE,SONUTES,GOTHOUT,RABING,O,WOIL,N
+8,MOGS,OY,UR,WOURN,NE,WORTING,RO,ODCOT,FROT,FORSOCKNESS,CONCE,HOWN,N
+9,VO,OVE,SUFFISTED,FROT,REE,ENSIFOL,EFFALLS,OY,REE,ADE,DERE,OUTULEVONT,N
+10,DURING,MOMENTS,OF,INTENSE,CRISIS,THE,RESPONSIBILITY,OF,POLITICAL,LEADERS,IS,OVERWHELMING,S
+11,SHE,PICKED,UP,THE,BABY,AND,NUZZLED,HER,FAT,WARM,LITTLE,NECK,S
+12,HE,JUMPED,UP,AND,TURNED,AROUND,TO,SEE,THE,METAL,DOOR,CLOSING,S
+10,REE,OTMOKNORE,OS,FROT,OY,OP,OMSTOCTIVE,PRIVINE,BEOLS,CLIB,ET,HOTH,N
+11,CHE,HUD,ONDIVED,PLIN,SERNING,OSE,COTH,STROCHES,RO,REE,ELTKISH,WORPENS,N
+12,QUEN,HU,GOTS,REE,ORSHERS,RO,HUS,SNUSTIONS,HU,WIST,NE,DENCEOROGED,N
+13,DE,MOMP,OT,BAPS,RO,REE,HONBEOR,EN,LELT,FRON,FEIR,SONUTES,N
+14,TRORE,OS,O,FUNCIBLE,TEEKING,EN,REE,ORF,OY,RELORSION,LOWOND,MELITICS,N
+15,FREIR,RURIES,INSPODE,ELUSEITION,OY,REE,INRONGOTION,COTTOCTED,OSE,PRODENOTION,OY,RETORCORDOTIONS,N
+13,THERE,WAS,LITTLE,CHANCE,ANYONE,WOULD,ENTER,THIS,SHAFT,DURING,THE,WINTER,S
+14,I,HAD,FELT,THE,DRAFT,THEY,WERE,MAKING,WHILE,MOUNTING,THE,STAIRS,S
+15,HE,SENSED,RATHER,THAN,HEARD,THE,GASP,THAT,SWEPT,ACROSS,THE,AUDIENCE,S
+16,GOTHIN,WRO,WOOKS,WONGEN,WAM,ROSSING,REE,BEMS,ET,REE,OGGEY,GOKE,N
+17,HU,BERIEW,EN,ROXING,ENWRORING,CLOUSSES,OSE,HU,MOMP,O,CREOT,PORY,N
+18,DE,BOUSED,HUS,REGONDS,OSE,PLOWLS,FREM,EN,OOR,SCHOATS,OSE,AMILERSITIES,N
+16,THEY,PASSED,RANCHES,THAT,WERE,FRAMED,DARK,GRAY,AGAINST,THE,BLACK,HILLS,S
+17,WHITE,PEPPER,IS,THE,GROUND,SEED,OF,THE,COMMON,BLACK,PEPPER,FRUIT,S
+18,LINDA,QUIETLY,GOT,A,BROOM,AND,STARTED,TO,SWEEP,UP,THE,SUGAR,S
+19,ON,THE,WAY,HE,STOPPED,AT,THE,DESK,TO,RECEIVE,HIS,MAIL,S
+20,THIS,WAS,DONE,WITH,FULL,KNOWLEDGE,THAT,THERE,WOULD,BE,NO,EPIDEMIC,S
+21,ALFRED,WALKED,PAST,HIM,WITHOUT,A,WORD,AND,GOT,INTO,THE,CAR,S
+19,REE,WILLSTIELD,WROSS,OS,FLOTTERFLOOK,OSE,CLEXIDROSS,OS,URTS,EN,REE,BOGIN,N
+20,STUBONT,TEORERS,BENON,CLOCUDIC,EFFONCE,RO,BEFURIOTE,CLEODRE,INCOPRITION,PITEROL,MORCED,ADA,N
+21,TOOD,SOSIO,DROLI,HOR,OTS,OPE,SPEVEOL,DENENDS,FROT,COYLY,NIED,REIMPEDURANTER,N
+22,REE,POGOR,OS,FONKING,OT,OMEWOCK,RO,COMPOTHS,OGIMMS,HUS,OPE,REGOND,N
+23,NIR,SETHER,OLPO,WAM,O,PITSON,OY,MIMORIOR,MOND,OSE,CLOOD,UNCEREVES,N
+24,REE,UNTERS,LIPETISE,HUD,HIRMEN,THULPELVES,EN,REE,GROMP,OSE,REE,BRUBS,N
+22,IT,WAS,THE,NIGHT,CLAYTON,HAD,TRICKED,THEM,IN,THE,POKER,GAME,S
+23,SHE,WAS,FOUND,THE,DAY,AFTER,AT,THE,BOTTOM,OF,THE,CLIFF,S
+24,WIDE,WINDOWS,WITH,MANY,SMALL,LEADED,PANES,SWEPT,ACROSS,THE,UPPER,STORIES,S
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set3.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set3.csv
new file mode 100644
index 00000000..2f956c55
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set3.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,SOTH,OY,REE,ESBONT,OS,SIND,OSE,OS,VOT,POODOBLE,FOM,FIDING,N
+2,KNORNEPTS,GOWLED,TROR,CHOVYING,EERSHQUORT,WORED,HOR,BOUN,ODSTIED,EN,NONIOUS,FIECED,N
+3,E,SNINK,HOO,ONK,COING,UNFOBS,RO,TADE,BLESE,SNINGS,UD,NOY,N
+1,COPIES,OF,THIS,LETTER,WERE,MADE,AVALIABLE,TO,THE,PRESS,AND,PUBLIC,S
+2,NEW,IDEAS,FOR,IMPROVING,NUTRITION,CAME,WITH,THE,STUDY,OF,SOIL,TREATMENT,S
+3,THEY,ARE,TRYING,TO,DEMONSTRATE,SOME,DIFFERENT,WAYS,OF,TEACHING,AND,LEARNING,S
+4,I,WANTED,TO,HELP,SO,THAT,WE,COULD,FIND,TIME,TO,PLAY,S
+5,THEY,ARE,THE,MOST,BEAUTIFUL,PART,OF,THAT,LITTLE,PIECE,OF,NATURE,S
+6,HE,WANTED,TO,GO,BACK,TO,HARVARD,FOR,ANOTHER,YEAR,OF,PLAYWRITING,S
+4,REE,IMPORLENCE,OY,MORTIGRO,RIZE,EN,SUME,OOROMILS,HOR,BOUN,CLOROUGHLY,DIDORSTROTED,N
+5,HU,CONCOLERED,OCISING,O,CAG,OY,BOUR,LUT,VEROUD,FROT,UCUO,SOO,N
+6,NOVE,OY,UR,WAM,OFURE,FROT,REE,BIBBEST,FIFFS,WAM,STULL,OFO,N
+7,AFTER,THAT,THEY,HAD,SAT,FOR,FIVE,MINUTES,WITHOUT,SAYING,A,WORD,S
+8,MOST,OF,US,WOULD,BE,WILLING,TO,ADMIT,THAT,FORGIVENESS,COMES,HARD,S
+9,NO,ONE,SUGGESTED,THAT,THE,ETHICAL,EFFECTS,OF,THE,ART,WERE,IRRELEVANT,S
+7,E,WAM,BORSTANTLY,MEORTHING,FOM,CLUSK,OTOUSE,REE,NEOCHNOURPOOD,OY,REE,HORS,N
+8,MONGHECERY,WREW,ORS,REE,VOLIENOL,TEORERS,UD,RO,REE,TICE,OY,KANGADY,N
+9,REE,SOLCHOSERS,REHONTED,REE,URPER,EN,WRO,TUTTERS,PROTTEN,EN,NEPOROUS,TELDS,N
+10,THE,ATMOSPHERE,IS,THAT,OF,AN,ATTRACTIVE,PRIVATE,BEACH,CLUB,AT,HOME,S
+11,SHE,HAD,ARRIVED,THIS,MORNING,AND,COME,STRAIGHT,TO,THE,ENGLISH,GARDENS,S
+12,WHEN,HE,GETS,THE,ANSWERS,TO,HIS,QUESTIONS,HE,WILL,BE,DISCOURAGED,S
+10,REE,FLIOT,HUD,PABBED,REE,LOLKS,COORDLOOM,FOM,HORE,FRON,O,WOOK,N
+11,BENGAN,GRINGED,OSE,SPIPPED,O,RECK,PITH,HUS,THURL,LOKE,O,MONDLE,N
+12,TRORE,WAM,SOILCLING,MOCHES,OSE,BROKY,ONIET,OTS,MOROON,FROT,DISTOLVES,FREM,N
+13,HU,SPINT,LONK,HEERS,LEFOSE,REE,PEREDISION,PLUTTING,OOT,PRERIDES,OY,RESONGE,N
+14,HU,LOAKED,FOM,REE,COURCE,OY,REE,NOITS,FROT,HUD,OFUKENED,HOM,N
+15,PLIN,EXCETSION,HOR,BOUN,WIING,OD,FOM,OP,OUTIKOTER,EILED,CULLION,YIERS,N
+13,WE,MADE,IT,BACK,TO,THE,HARBOR,IN,LESS,THAN,FOUR,MINUTES,S
+14,THERE,IS,A,TANGIBLE,FEELING,IN,THE,AIR,OF,REVULSION,TOWARD,POLITICS,S
+15,THEIR,DUTIES,INCLUDE,EVALUATION,OF,THE,INFORMATION,COLLECTED,AND,PREPARATION,OF,RECOMMENDATIONS,S
+16,NIR,FOPE,REEMED,RO,FLOOT,EN,OP,INSCABDIBLY,CLIGHT,CHOFT,OY,PONLIGHT,N
+17,REE,RIZE,OY,WEOREN,MOURD,WIST,DETERBOLS,REE,OSIENT,OY,CROY,NOONED,N
+18,E,TEEK,REE,PAPE,OSE,CHEVEL,OSE,WEWS,OOT,OD,REE,POLKS,N
+16,WITHIN,TWO,WEEKS,WARREN,WAS,RINGING,THE,BELL,AT,THE,ABBEY,GATE,S
+17,HE,BELIEVED,IN,MAKING,INSPIRING,SPEECHES,AND,HE,MADE,A,GREAT,MANY,S
+18,WE,BOUGHT,HIS,RECORDS,AND,PLAYED,THEM,IN,OUR,SCHOOLS,AND,UNIVERSITIES,S
+19,THE,WINDSHIELD,GLASS,IS,SHATTERPROOF,AND,PLEXIGLAS,IS,USED,IN,THE,CABIN,S
+20,STUDENT,LEADERS,BEGAN,SPORADIC,EFFORTS,TO,NEGOTIATE,THEATER,INTEGRATION,SEVERAL,MONTHS,AGO,S
+21,GOOD,RADIO,DRAMA,HAS,ITS,OWN,SPECIAL,DEMANDS,THAT,BADLY,NEED,REINVIGORATION,S
+19,WOM,OS,REE,RECUNK,OY,MISSLIST,OSE,LONS,OY,OMPERPRONDING,BESHOON,DOOPLE,N
+20,BRINSTON,RETIFONTS,HABS,BOUN,NOREROUS,CONSCIKETORS,RO,REE,FUKE,IBER,REE,YIERS,N
+21,GRENCELCA,OSE,HERSORT,DERE,OBUNG,REE,WEW,DOOPLE,DE,WREW,EN,DORECONIO,N
+22,PECTOR,APAL,WAM,TUZZY,UD,SNONT,PITH,SOTH,OY,HUS,LIMS,PORIENDS,N
+23,SOTH,DOOPLE,CAG,COSMS,OLBUST,OTUBLING,OOT,OY,O,PIEKS,OY,DOOD,N
+24,HU,DEES,VOT,REEM,RO,HABS,CAINTS,REE,SURQUENIES,OY,REE,LAN,N
+22,THE,MAYOR,IS,FINDING,IT,AWKWARD,TO,CAMPAIGN,AGAINST,HIS,OWN,RECORD,S
+23,HER,MOTHER,ALSO,WAS,A,PERSON,OF,SUPERIOR,MIND,AND,BROAD,INTERESTS,S
+24,THE,OTHERS,LIKEWISE,HAD,HIDDEN,THEMSELVES,IN,THE,GRASS,AND,THE,BRUSH,S
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set4.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set4.csv
new file mode 100644
index 00000000..b2b826c7
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set4.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,QUEY,ONK,PIMNING,STROSH,CHESTURE,OD,FREIR,POLYLL,DENIMPMENTS,RO,TEEP,URPER,N
+2,SANANAR,HOGURT,HULFSROY,OS,ODMIEITLY,O,LAN,PITH,O,ROUL,OSE,HEALT,N
+3,E,FILLAWED,FREM,EN,REE,JOUP,OSE,NOY,QUEY,DOD,VOT,CANK,N
+1,SOME,OF,THE,ISLAND,IS,SAND,AND,IS,NOT,SUITABLE,FOR,LIVING,S
+2,KNOWLEDGE,GAINED,FROM,STUDYING,EARTHQUAKE,WAVES,HAS,BEEN,APPLIED,IN,VARIOUS,FIELDS,S
+3,I,THINK,YOU,ARE,BEING,UNFAIR,TO,TAKE,THESE,THINGS,UP,NOW,S
+4,THE,IMPORTANCE,OF,PARTICLE,SIZE,IN,SUCH,AEROSOLS,HAS,BEEN,THOROUGHLY,DEMONSTRATED,S
+5,HE,CONSIDERED,OPENING,A,CAN,OF,BEER,BUT,VETOED,THAT,IDEA,TOO,S
+6,NONE,OF,US,WAS,AWARE,THAT,THE,BIGGEST,FIGHT,WAS,STILL,AHEAD,S
+4,SOMEMEMES,E,GUNESCO,OT,WAM,BEBAVES,REE,TAIN,SQUARS,HUD,CHAMMED,DICONTION,N
+5,PLIN,MOPEGAAL,STUOCIGNES,URMER,USTRAPIOMUM,LIFFS,WHISS,BOMINITATES,OTS,MIMPLING,OSE,ASCISHMENT,N
+6,HU,OS,BELLER,TOTTED,RO,PERFAID,HUS,SOVEAL,LIRR,OBUNG,HUS,FULLORN,N
+7,I,WAS,CONSTANTLY,SEARCHING,FOR,CLUES,AROUND,THE,NEIGHBORHOOD,OF,THE,HALL,S
+8,MONTGOMERY,KNEW,ALL,THE,NATIONAL,LEADERS,UP,TO,THE,TIME,OF,KENNEDY,S
+9,THE,PURCHASERS,REJECTED,THE,ORDER,IN,TWO,LETTERS,WRITTEN,IN,VIGOROUS,TERMS,S
+7,TROR,REE,OUTNODE,OT,WAM,OP,ODMITORY,ENEAUX,HOULE,OY,REE,GISTRY,N
+8,CHE,ENVENIELLED,NOVE,OY,REE,SUSLANSE,OY,SOTH,POOM,STRINKER,MOLLING,ANCYGRACOKIAS,N
+9,QUOTHER,HU,SABS,WOLL,OM,COYLY,HUD,NENTING,RO,FE,PITH,OT,N
+10,THE,TRIAL,HAD,PACKED,THE,LARGE,COURTROOM,FOR,MORE,THAN,A,WEEK,S
+11,BENSON,GRINNED,AND,FLIPPED,A,ROCK,WITH,HIS,THUMB,LIKE,A,MARBLE,S
+12,THERE,WAS,SOMETHING,MAIMED,AND,CRAZY,ABOUT,ITS,MOTION,THAT,DISTURBED,THEM,S
+10,HU,WEWS,RO,REE,SNONT,WOOR,OSE,OBONED,OT,OSE,LOAKED,EN,N
+11,HU,CAINTS,NIR,CY,OP,ORL,OSE,HELVED,NIR,INNI,REE,LENCHEN,N
+12,HU,TORD,HIMRALF,HU,HUD,NUDER,SEAN,WRO,DOOPLE,EET,SU,MUME,N
+13,MIMP,SNOTCHED,O,PUNTOL,TROR,REE,HEAN,OY,FLITTERED,BOURY,OSE,DIRED,N
+14,RUD,GABS,NIR,O,WOSK,DIT,OD,REE,SHEALPER,LEFOSE,HU,RETWEED,N
+15,HU,GOL,O,WROLL,FIMS,PRORTED,OSE,DUT,OD,BABIN,OSE,COCTEE,N
+13,HE,SPENT,LONG,HOURS,BEFORE,THE,TV,SPITTING,OUT,PROMISES,OF,REVENGE,S
+14,HE,LOOKED,FOR,THE,SOURCE,OF,THE,NOISE,THAT,HAD,AWAKENED,HIM,S
+15,THIS,EXPANSION,HAS,BEEN,GOING,ON,FOR,AN,ESTIMATED,EIGHT,BILLION,YEARS,S
+16,REMOY,SAR,PONLIGHT,TEECH,REE,BIRLY,BLOIFS,HAITS,OD,REE,BRORN,SMIN,N
+17,REE,MARENT,COMBANCED,RO,WOOP,OSE,OT,BLATCHED,REE,SELPS,OY,ENHEPMENT,N
+18,THUN,HU,ASSINIGNED,DATSUO,CY,PESTING,OSE,CLIGGING,HIMRALF,UNTAT,HU,SUT,N
+16,HER,FACE,SEEMED,TO,FLOAT,IN,AN,IMPLAUSIBLY,BRIGHT,SHAFT,OF,SUNLIGHT,S
+17,THE,SIZE,OF,WOODEN,MOLD,WILL,DETERMINE,THE,AMOUNT,OF,CLAY,NEEDED,S
+18,I,TOOK,THE,PAIL,AND,SHOVEL,AND,WENT,OUT,ON,THE,PORCH,S
+19,WAR,IS,THE,RESULT,OF,MISTRUST,AND,LACK,OF,UNDERSTANDING,BETWEEN,PEOPLE,S
+20,CRANSTON,RESIDENTS,HAVE,BEEN,GENEROUS,CONTRIBUTORS,TO,THE,FUND,OVER,THE,YEARS,S
+21,FRANCESCA,AND,HERBERT,WERE,AMONG,THE,FEW,PEOPLE,WE,KNEW,IN,CATALONIA,S
+19,E,SAR,HOM,MYBOLF,OSE,OT,WAM,WONE,AULER,CONCATLATION,PITH,CRIMWOLL,N
+20,RO,HABS,SORNNAY,FROT,LOND,LEFORNED,WAM,QUAT,HU,HUD,LINCE,FOM,N
+21,LUT,OT,WAM,SUME,O,NILT,PROUGHT,FROT,HU,GEDDED,HUS,HEAM,N
+22,ADYTHE,MUTTLED,DOIL,RO,BEMIME,O,SOVEAL,MYME,OSE,O,FERMIBLE,EXAYBRA,N
+23,AIRE,WILDED,UNTAT,REE,WOOR,HUD,SLANNED,OSE,PUCKED,UD,REE,COCTIEPOT,N
+24,REE,EFFAKE,OD,REE,INTESCELLEALS,OBUNG,HUS,ORIENCE,MAK,WOLL,NE,IRAGENED,N
+22,DOC,ABEL,WAS,BUSY,UP,FRONT,WITH,SOME,OF,HIS,LIVE,PATIENTS,S
+23,SOME,PEOPLE,CAN,CARVE,ALMOST,ANYTHING,OUT,OF,A,PIECE,OF,WOOD,S
+24,HE,DOES,NOT,SEEM,TO,HAVE,CAUGHT,THE,SUBTLETIES,OF,THE,MAN,S
diff --git a/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set5.csv b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set5.csv
new file mode 100644
index 00000000..165bebc8
--- /dev/null
+++ b/brainscore_language/data/fedorenko2010_localization/langloc_fmri_run2_stim_set5.csv
@@ -0,0 +1,49 @@
+stim1,stim2,stim3,stim4,stim5,stim6,stim7,stim8,stim9,stim10,stim11,stim12,stim13,stim14
+1,HU,GOVES,BRUDIT,FOM,REE,STOCOTION,RO,HUS,NER,OUTLEOF,OD,LIRR,N
+2,NIED,FOM,NEVESRY,MAK,NE,O,SYMELOM,OY,CEDDUROL,FOTOLMS,OSE,INCRECORITY,N
+3,HU,SUT,UD,OSE,WONCHED,AR,QUEY,PUBBED,THULPELVES,IBER,REE,STURN,N
+1,THEY,ARE,PUTTING,STRONG,PRESSURE,ON,THEIR,POLICE,DEPARTMENTS,TO,KEEP,ORDER,S
+2,SENATOR,HUBERT,HUMPHREY,IS,OBVIOUSLY,A,MAN,WITH,A,SOUL,AND,HEART,S
+3,I,FOLLOWED,THEM,IN,THE,JEEP,AND,NOW,THEY,DID,NOT,CARE,S
+4,SOMETIMES,I,GUESSED,IT,WAS,BECAUSE,THE,RAIN,SQUALL,HAD,CHANGED,DIRECTION,S
+5,THIS,MATERIAL,FLUORESCES,UNDER,ULTRAVIOLET,LIGHT,WHICH,FACILITATES,ITS,SAMPLING,AND,ASSESSMENT,S
+6,HE,IS,BETTER,FITTED,TO,PERFORM,HIS,SOCIAL,LIFE,AMONG,HIS,FELLOWS,S
+4,HU,LELD,REE,RERE,OY,HUS,SNINGS,OSE,LEFORNED,RO,REE,LOMPY,N
+5,SHONESE,OSE,ENCIOD,MERSHOCKS,OTHISS,REE,STREOP,DERE,SLOSSING,FREIR,STOUL,CHOTTERS,N
+6,O,CORGLE,OTIVES,EN,REE,ORF,DIRIDES,OTS,FLOPE,OSE,SMOLE,UGWONDS,N
+7,FROM,THE,OUTSIDE,IT,WAS,AN,ORDINARY,ENOUGH,HOUSE,OF,THE,GENTRY,S
+8,SHE,EXPERIENCED,NONE,OF,THE,SUSPENSE,OF,SOME,POOR,STRANGER,SELLING,ENCYCLOPEDIAS,S
+9,WHETHER,HE,SANG,WELL,OR,BADLY,HAD,NOTHING,TO,DO,WITH,IT,S
+7,PLIN,DINMIRMITY,REDNEFENTS,O,DITCEROTE,OLLEMPT,RO,STOFILOLS,O,TORMFESSLY,URCHADLE,ERCIRULMENT,N
+8,REE,UNTER,POGRENS,DERE,MOXI,CLIRERS,OSE,ADE,STUBONTS,OSE,WROLL,FRILLKOOPERS,N
+9,MIMP,POFFED,SCROUGH,OT,OSE,MOFFS,LOWOND,REE,DORD,MEMP,OY,HURTES,N
+10,HE,WENT,TO,THE,FRONT,DOOR,AND,OPENED,IT,AND,LOOKED,IN,S
+11,HE,CAUGHT,HER,BY,AN,ARM,AND,HELPED,HER,INTO,THE,KITCHEN,S
+12,HE,TOLD,HIMSELF,HE,HAD,NEVER,SEEN,TWO,PEOPLE,EAT,SO,MUCH,S
+10,ET,ORKS,O,BUPY,OY,DEGS,WAM,SWIPPING,OSE,SNURVING,OTOUSE,HOM,N
+11,OP,OTFOCTIVE,SCOSS,WAM,DEVESYSMS,FOM,RUMING,SCHOAT,NEOCHNOURPOODS,TROR,BLESE,ROLO,N
+12,DE,SPIND,MILLOOTS,OY,PELLORS,ELKBY,YLPH,OD,FORBONE,TUNKERS,OSE,SOOSTGOYERS,N
+13,HU,SUT,DOIL,OD,OP,ORN,BOF,OSE,FOCECKED,OD,REE,TROPLEN,N
+14,PLIN,OS,OP,OTTURPTION,PITH,WHISS,WEW,WOURN,NE,DISLODES,RO,SWORRUL,N
+15,OT,WAM,O,MOUGH,LONK,RIMP,SCROUGH,REE,RUD,OSE,MOT,HOWED,N
+13,MIKE,SNATCHED,A,PISTOL,FROM,THE,HEAP,OF,SCATTERED,BOOTY,AND,FIRED,S
+14,ROD,GAVE,HER,A,WARM,PAT,ON,THE,SHOULDER,BEFORE,HE,REPLIED,S
+15,HE,GOT,A,SMALL,FIRE,STARTED,AND,PUT,ON,BACON,AND,COFFEE,S
+16,CORMUNERS,ONK,COING,URTS,RO,TEEP,BROFFS,INMINCONIES,ET,HORE,NUNKOBLE,TUVELS,N
+17,TRORE,ONK,CLAUPANDS,OY,SQUONK,MIPES,OY,SOFF,POS,WHISS,ONK,FUTEOUS,N
+18,E,WEWS,RO,JISIL,AMSHED,EN,REE,KINGSTON,FOBBITOL,O,WEW,TIVES,N
+16,RAMEY,SAW,SUNLIGHT,TOUCH,THE,CURLY,BLONDE,HAIRS,ON,THE,BROWN,SKIN,S
+17,THE,MARINE,COMMENCED,TO,WEEP,AND,IT,BLIGHTED,THE,SENSE,OF,ENJOYMENT,S
+18,THEN,HE,ASTONISHED,MATSUO,BY,PUSHING,AND,DRAGGING,HIMSELF,UNTIL,HE,SAT,S
+19,I,SAW,HIM,MYSELF,AND,IT,WAS,DONE,AFTER,CONSULTATION,WITH,CROMWELL,S
+20,TO,HAVE,SOMEDAY,THAT,LOVE,RETURNED,WAS,WHAT,HE,HAD,LIVED,FOR,S
+21,BUT,IT,WAS,SUCH,A,NICE,THOUGHT,THAT,HE,NODDED,HIS,HEAD,S
+19,HU,REEMED,RO,NE,LOAKING,ET,O,POITS,OPOKE,REE,TOTTLE,GONDOW,N
+20,OLBUST,VO,ELLYRINOL,WOFT,HOR,BOUN,WONE,OD,REE,TROPLEN,OY,IMUILOTION,N
+21,EF,DE,LOAK,ET,RECURE,ADE,DE,FIMP,OT,PREOTBUVOED,PITH,FOGE,N
+22,HOO,COURN,WIP,O,SYLUCOLITY,CORTETS,ET,FROT,SCHOAT,GOTHOUT,EDY,TROOMLE,N
+23,SUBON,OSE,JUROE,MIPPED,STREPS,TROR,FREIR,FRITHING,OSE,BOUSE,REE,ENCUFY,N
+24,DE,HUD,BEMIME,TOOD,FRIESTS,RORING,HY,STOK,ET,COOB,COURRY,FOBBITOL,N
+22,EDYTHE,SETTLED,DOWN,TO,BECOME,A,SOCIAL,MYTH,AND,A,HORRIBLE,EXAMPLE,S
+23,ANNE,WAITED,UNTIL,THE,DOOR,HAD,SLAMMED,AND,PICKED,UP,THE,COFFEEPOT,S
+24,THE,EFFECT,ON,THE,INTELLECTUALS,AMONG,HIS,AUDIENCE,MAY,WELL,BE,IMAGINED,S
diff --git a/brainscore_language/model_helpers/huggingface.py b/brainscore_language/model_helpers/huggingface.py
index 84576853..96618f4e 100644
--- a/brainscore_language/model_helpers/huggingface.py
+++ b/brainscore_language/model_helpers/huggingface.py
@@ -17,6 +17,7 @@
 from brainscore_language.artificial_subject import ArtificialSubject
 from brainscore_language.model_helpers.preprocessing import prepare_context
 from brainscore_language.utils import fullname
+from brainscore_language.model_helpers.localize import localize_fed10
 
 
 class HuggingfaceSubject(ArtificialSubject):
@@ -26,6 +27,8 @@ def __init__(
             region_layer_mapping: dict,
             model=None,
             tokenizer=None,
+            use_localizer=False,
+            localizer_kwargs=None,
             task_heads: Union[None, Dict[ArtificialSubject.Task, Callable]] = None,
     ):
         """
@@ -41,6 +44,7 @@ def __init__(
         """
         self._logger = logging.getLogger(fullname(self))
         self.model_id = model_id
+        self.use_localizer = use_localizer
         self.region_layer_mapping = region_layer_mapping
         self.basemodel = (model if model is not None else AutoModelForCausalLM.from_pretrained(self.model_id))
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -59,6 +63,18 @@ def __init__(
         }
         self.task_function_mapping_dict = {**task_mapping_default, **task_heads} if task_heads else task_mapping_default
 
+        if self.use_localizer:
+            layer_names = region_layer_mapping["language_system"]
+            self.language_mask = localize_fed10(model_id=self.model_id, 
+                model=self.basemodel, 
+                tokenizer=self.tokenizer, 
+                layer_names=layer_names,
+                top_k=localizer_kwargs["top_k"],
+                batch_size=localizer_kwargs["batch_size"],
+                hidden_dim=localizer_kwargs["hidden_dim"],
+                device=self.device
+            ).flatten()
+
     def identifier(self):
         return self.model_id
 
@@ -122,6 +138,13 @@ def digest_text(self, text: Union[str, List[str]]) -> Dict[str, DataAssembly]:
             if output['behavior'] else None
         output['neural'] = xr.concat(output['neural'], dim='presentation').sortby('part_number') \
             if output['neural'] else None
+        
+        if self.neural_recordings and self.use_localizer:
+            num_presentations = output['neural'].data.shape[0]
+            output['neural-mask'] = output['neural'].copy()
+            output['neural-mask'].data = np.repeat(self.language_mask[np.newaxis,:], num_presentations, axis=0)
+            output['neural'] = output['neural'].where(output['neural-mask'], drop=True)
+                    
         return output
 
     def _prepare_context(self, context_parts):
@@ -190,11 +213,16 @@ def _setup_hooks(self):
         hooks = []
         layer_representations = OrderedDict()
         for (recording_target, recording_type) in self.neural_recordings:
-            layer_name = self.region_layer_mapping[recording_target]
-            layer = self._get_layer(layer_name)
-            hook = self._register_hook(layer, key=(recording_target, recording_type, layer_name),
-                                       target_dict=layer_representations)
-            hooks.append(hook)
+            layer_names = self.region_layer_mapping[recording_target]
+            if type(layer_names) == str:
+                layer_names = [layer_names]
+
+            for layer_idx, layer_name in enumerate(layer_names):
+                layer = self._get_layer(layer_name)
+                hook = self._register_hook(layer, key=(f"{recording_target}.{layer_idx}", recording_type, layer_name),
+                                        target_dict=layer_representations)
+                hooks.append(hook)
+
         return hooks, layer_representations
 
     def output_to_representations(self, layer_representations: Dict[Tuple[str, str, str], np.ndarray], stimuli_coords):
diff --git a/brainscore_language/model_helpers/localize.py b/brainscore_language/model_helpers/localize.py
new file mode 100644
index 00000000..d27bed8f
--- /dev/null
+++ b/brainscore_language/model_helpers/localize.py
@@ -0,0 +1,174 @@
+from typing import List
+from collections import OrderedDict
+
+import os
+import scipy
+import torch
+import logging
+import numpy as np
+import transformers
+import pandas as pd
+
+from glob import glob
+from tqdm import tqdm
+from torch.utils.data import Dataset, DataLoader
+from pathlib import Path
+
+from brainscore_language import load_dataset
+
+BRAINIO_CACHE = os.environ.get("BRAINIO", f"{Path.home()}/.brainio")
+os.environ["TOKENIZERS_PARALLELISM"] = "False"
+
+logger = logging.getLogger(__name__)
+
+# Code adapted from: https://github.com/bkhmsi/brain-language-suma
+
+class Fed10_langlocDataset(Dataset):
+    def __init__(self):
+        self.num_samples = 240
+
+        data = load_dataset("Fedorenko2010.localization")
+        self.sentences = data[data["stim14"]=="S"]["sent"]
+        self.non_words = data[data["stim14"]=="N"]["sent"]
+
+    def __getitem__(self, idx):
+        return self.sentences.iloc[idx].strip(), self.non_words.iloc[idx].strip()
+    
+    def __len__(self):
+        return len(self.sentences)
+
+def _get_layer(module, layer_name: str) -> torch.nn.Module:
+    SUBMODULE_SEPARATOR = '.'
+    for part in layer_name.split(SUBMODULE_SEPARATOR):
+        module = module._modules.get(part)
+        assert module is not None, f"No submodule found for layer {layer_name}, at part {part}"
+    return module
+    
+def _register_hook(layer: torch.nn.Module,
+                    key: str,
+                    target_dict: dict):
+    # instantiate parameters to function defaults; otherwise they would change on next function call
+    def hook_function(_layer: torch.nn.Module, _input, output: torch.Tensor, key=key):
+        # fix for when taking out only the hidden state, this is different from dropout because of residual state
+        # see:  https://github.com/huggingface/transformers/blob/c06d55564740ebdaaf866ffbbbabf8843b34df4b/src/transformers/models/gpt2/modeling_gpt2.py#L428
+        output = output[0] if isinstance(output, (tuple, list)) else output
+        target_dict[key] = output
+
+    hook = layer.register_forward_hook(hook_function)
+    return hook
+
+def setup_hooks(model, layer_names):
+    """ set up the hooks for recording internal neural activity from the model (aka layer activations) """
+    hooks = []
+    layer_representations = OrderedDict()
+
+    for layer_name in layer_names:
+        layer = _get_layer(model, layer_name)
+        hook = _register_hook(layer, key=layer_name,
+                                target_dict=layer_representations)
+        hooks.append(hook)
+
+    return hooks, layer_representations
+
+def extract_batch(
+    model: torch.nn.Module, 
+    input_ids: torch.Tensor, 
+    attention_mask: torch.Tensor,
+    layer_names: List[str],
+):
+    
+    batch_activations = {layer_name: [] for layer_name in layer_names}
+    hooks, layer_representations = setup_hooks(model, layer_names)
+
+    with torch.no_grad():
+        _ = model(input_ids=input_ids, attention_mask=attention_mask)
+
+    for sample_idx in range(len(input_ids)):
+        for layer_idx, layer_name in enumerate(layer_names):
+            activations = layer_representations[layer_name][sample_idx][-1].cpu()    
+            batch_activations[layer_name] += [activations]
+
+    for hook in hooks:
+        hook.remove()
+
+    return batch_activations
+
+def extract_representations(
+    model: torch.nn.Module,
+    tokenizer: transformers.PreTrainedTokenizer,
+    layer_names: List[str],
+    hidden_dim: int,
+    batch_size: int,
+    device: torch.device,
+):
+    langloc_dataset = Fed10_langlocDataset()
+
+    # Get the activations of the model on the dataset
+    langloc_dataloader = DataLoader(langloc_dataset, batch_size=batch_size, num_workers=0)
+
+    logger.debug(f"> Using Device: {device}")
+
+    model.eval()
+    model.to(device)
+
+    final_layer_representations = {
+        "sentences": {layer_name: np.zeros((langloc_dataset.num_samples, hidden_dim)) for layer_name in layer_names},
+        "non-words": {layer_name: np.zeros((langloc_dataset.num_samples, hidden_dim)) for layer_name in layer_names}
+    }
+    
+    for batch_idx, batch_data in tqdm(enumerate(langloc_dataloader)):
+
+        sents, non_words = batch_data
+        sent_tokens = tokenizer(sents, truncation=True, max_length=12, return_tensors='pt').to(device)
+        non_words_tokens = tokenizer(non_words, truncation=True, max_length=12, return_tensors='pt').to(device)
+        assert sent_tokens.input_ids.size(1) == non_words_tokens.input_ids.size(1)
+        
+        batch_real_actv = extract_batch(model, sent_tokens["input_ids"], sent_tokens["attention_mask"], layer_names)
+        batch_rand_actv = extract_batch(model, non_words_tokens["input_ids"], non_words_tokens["attention_mask"], layer_names)
+
+        for layer_name in layer_names:
+            final_layer_representations["sentences"][layer_name][batch_idx*batch_size:(batch_idx+1)*batch_size] = torch.stack(batch_real_actv[layer_name]).numpy()
+            final_layer_representations["non-words"][layer_name][batch_idx*batch_size:(batch_idx+1)*batch_size] = torch.stack(batch_rand_actv[layer_name]).numpy()
+
+    return final_layer_representations
+
+def localize_fed10(model_id: str,
+    model: torch.nn.Module, 
+    top_k: int, 
+    tokenizer: transformers.PreTrainedTokenizer, 
+    hidden_dim: int, 
+    layer_names: List[str], 
+    batch_size: int,
+    device: torch.device,
+):
+    """
+    Localize the model by selecting the top `top_k` units.
+    """
+
+    save_path = f"{BRAINIO_CACHE}/{model_id}_language_mask.npy"
+
+    if os.path.exists(save_path):
+        logger.debug(f"Loading language mask from {save_path}")
+        return np.load(save_path)
+
+    representations = extract_representations(model, tokenizer, layer_names, hidden_dim, batch_size, device)
+
+    p_values_matrix = np.zeros((len(layer_names), hidden_dim))
+    t_values_matrix = np.zeros((len(layer_names), hidden_dim))
+
+    for layer_idx, layer_name in tqdm(enumerate(layer_names)):
+
+        sentences_actv = representations["sentences"][layer_name]
+        non_words_actv = representations["non-words"][layer_name]
+
+        t_values_matrix[layer_idx], p_values_matrix[layer_idx] = scipy.stats.ttest_ind(sentences_actv, non_words_actv, axis=0, equal_var=False)
+ 
+    def is_topk(a, k=1):
+        _, rix = np.unique(-a, return_inverse=True)
+        return np.where(rix < k, 1, 0).reshape(a.shape)
+
+    language_mask = is_topk(t_values_matrix, k=top_k)
+
+    np.save(save_path, language_mask)
+    logger.debug(f"{model_id} language mask cached to {save_path}")
+    return language_mask
diff --git a/examples/score_localization.py b/examples/score_localization.py
new file mode 100644
index 00000000..eda4d02d
--- /dev/null
+++ b/examples/score_localization.py
@@ -0,0 +1,26 @@
+from tqdm import tqdm
+from brainscore_language import load_benchmark
+from brainscore_language.model_helpers.huggingface import HuggingfaceSubject
+from brainscore_language import ArtificialSubject
+
+benchmark = load_benchmark('Pereira2018.243sentences-linear')
+
+num_blocks = 12
+layer_names = [f'transformer.h.{block}.{layer_type}' 
+    for block in range(num_blocks) 
+    for layer_type in ['ln_1', 'attn', 'ln_2', 'mlp']
+]
+
+layer_model = HuggingfaceSubject(model_id='gpt2', 
+    region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: layer_names},
+    use_localizer=True,
+    localizer_kwargs={
+        'hidden_dim': 768,
+        'batch_size': 16,
+        "top_k": 4096,
+    }
+)
+
+layer_score = benchmark(layer_model)
+
+print(layer_score)
\ No newline at end of file

From f98c6abef663fb03ddf208b9cfd524a172f10af4 Mon Sep 17 00:00:00 2001
From: Badr AlKhamissi <badr@khamissi.com>
Date: Sat, 6 Jul 2024 07:43:04 +0200
Subject: [PATCH 2/8] changed variable names in localization example

---
 examples/score_localization.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/score_localization.py b/examples/score_localization.py
index eda4d02d..b7dd1d22 100644
--- a/examples/score_localization.py
+++ b/examples/score_localization.py
@@ -11,7 +11,7 @@
     for layer_type in ['ln_1', 'attn', 'ln_2', 'mlp']
 ]
 
-layer_model = HuggingfaceSubject(model_id='gpt2', 
+model = HuggingfaceSubject(model_id='gpt2', 
     region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: layer_names},
     use_localizer=True,
     localizer_kwargs={
@@ -21,6 +21,6 @@
     }
 )
 
-layer_score = benchmark(layer_model)
+model_score = benchmark(model)
 
-print(layer_score)
\ No newline at end of file
+print(model_score)
\ No newline at end of file

From 5961de06d89acc7084f680cc7dc396861b1ee204 Mon Sep 17 00:00:00 2001
From: Badr AlKhamissi <badr@khamissi.com>
Date: Fri, 19 Jul 2024 04:58:36 -0400
Subject: [PATCH 3/8] Update .gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 8a001910..8be6bb82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,6 +136,7 @@ dmypy.json
 
 ### project specific additions:
 
+brainscore_language/data
 html
 .vscode
 *.code-workspace
@@ -148,4 +149,4 @@ cache
 .cache
 .idea/
 wandb/
-**/models/lm1b/resources
\ No newline at end of file
+**/models/lm1b/resources

From da7672e5581a732c127c82df6fb457f51ad1898e Mon Sep 17 00:00:00 2001
From: Badr AlKhamissi <badr@khamissi.com>
Date: Thu, 8 Aug 2024 09:21:09 -0400
Subject: [PATCH 4/8] added comments

---
 brainscore_language/data/fedorenko2010_localization/__init__.py | 1 +
 brainscore_language/model_helpers/localize.py                   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/brainscore_language/data/fedorenko2010_localization/__init__.py b/brainscore_language/data/fedorenko2010_localization/__init__.py
index 22a51d27..ff7bf276 100644
--- a/brainscore_language/data/fedorenko2010_localization/__init__.py
+++ b/brainscore_language/data/fedorenko2010_localization/__init__.py
@@ -27,6 +27,7 @@ def load_data():
     data["sent"] = data["stim2"].apply(str.lower)
 
     for stimuli_idx in range(3, 14):
+        # lowercase each stimulus/word then add it to the sentence
         data["sent"] += " " + data[f"stim{stimuli_idx}"].apply(str.lower)
     return data
 
diff --git a/brainscore_language/model_helpers/localize.py b/brainscore_language/model_helpers/localize.py
index d27bed8f..83177349 100644
--- a/brainscore_language/model_helpers/localize.py
+++ b/brainscore_language/model_helpers/localize.py
@@ -16,8 +16,8 @@
 
 from brainscore_language import load_dataset
 
+# To cache the language mask
 BRAINIO_CACHE = os.environ.get("BRAINIO", f"{Path.home()}/.brainio")
-os.environ["TOKENIZERS_PARALLELISM"] = "False"
 
 logger = logging.getLogger(__name__)
 

From d35d254b72d87be1daa36dc7bcdbcc0899917a90 Mon Sep 17 00:00:00 2001
From: Badr AlKhamissi <badr@khamissi.com>
Date: Mon, 12 Aug 2024 17:24:53 +0700
Subject: [PATCH 5/8] removed num_samples from Fed10_langlocDataset

---
 brainscore_language/model_helpers/localize.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/brainscore_language/model_helpers/localize.py b/brainscore_language/model_helpers/localize.py
index 83177349..47134461 100644
--- a/brainscore_language/model_helpers/localize.py
+++ b/brainscore_language/model_helpers/localize.py
@@ -25,8 +25,6 @@
 
 class Fed10_langlocDataset(Dataset):
     def __init__(self):
-        self.num_samples = 240
-
         data = load_dataset("Fedorenko2010.localization")
         self.sentences = data[data["stim14"]=="S"]["sent"]
         self.non_words = data[data["stim14"]=="N"]["sent"]
@@ -112,8 +110,8 @@ def extract_representations(
     model.to(device)
 
     final_layer_representations = {
-        "sentences": {layer_name: np.zeros((langloc_dataset.num_samples, hidden_dim)) for layer_name in layer_names},
-        "non-words": {layer_name: np.zeros((langloc_dataset.num_samples, hidden_dim)) for layer_name in layer_names}
+        "sentences": {layer_name: np.zeros((len(langloc_dataset.sentences), hidden_dim)) for layer_name in layer_names},
+        "non-words": {layer_name: np.zeros((len(langloc_dataset.sentences), hidden_dim)) for layer_name in layer_names}
     }
     
     for batch_idx, batch_data in tqdm(enumerate(langloc_dataloader)):

From a07c9d233dbd938fb91003b80d21cdda2dbcf793 Mon Sep 17 00:00:00 2001
From: Badr AlKhamissi <badr@khamissi.com>
Date: Sun, 18 Aug 2024 19:39:30 +0200
Subject: [PATCH 6/8] SUMA now supported

---
 .gitignore                                    |    1 +
 brainscore_language/model_helpers/localize.py |    5 +-
 .../model_helpers/modeling_suma.py            | 1176 +++++++++++++++++
 brainscore_language/models/suma/__init__.py   |   30 +
 examples/score_suma.py                        |   13 +
 5 files changed, 1222 insertions(+), 3 deletions(-)
 create mode 100644 brainscore_language/model_helpers/modeling_suma.py
 create mode 100644 brainscore_language/models/suma/__init__.py
 create mode 100644 examples/score_suma.py

diff --git a/.gitignore b/.gitignore
index 8be6bb82..69b1c768 100644
--- a/.gitignore
+++ b/.gitignore
@@ -150,3 +150,4 @@ cache
 .idea/
 wandb/
 **/models/lm1b/resources
+conda_score--*
diff --git a/brainscore_language/model_helpers/localize.py b/brainscore_language/model_helpers/localize.py
index 47134461..a56eea55 100644
--- a/brainscore_language/model_helpers/localize.py
+++ b/brainscore_language/model_helpers/localize.py
@@ -7,7 +7,6 @@
 import logging
 import numpy as np
 import transformers
-import pandas as pd
 
 from glob import glob
 from tqdm import tqdm
@@ -114,7 +113,7 @@ def extract_representations(
         "non-words": {layer_name: np.zeros((len(langloc_dataset.sentences), hidden_dim)) for layer_name in layer_names}
     }
     
-    for batch_idx, batch_data in tqdm(enumerate(langloc_dataloader)):
+    for batch_idx, batch_data in tqdm(enumerate(langloc_dataloader), total=len(langloc_dataloader)):
 
         sents, non_words = batch_data
         sent_tokens = tokenizer(sents, truncation=True, max_length=12, return_tensors='pt').to(device)
@@ -154,7 +153,7 @@ def localize_fed10(model_id: str,
     p_values_matrix = np.zeros((len(layer_names), hidden_dim))
     t_values_matrix = np.zeros((len(layer_names), hidden_dim))
 
-    for layer_idx, layer_name in tqdm(enumerate(layer_names)):
+    for layer_idx, layer_name in tqdm(enumerate(layer_names), total=len(layer_names)):
 
         sentences_actv = representations["sentences"][layer_name]
         non_words_actv = representations["non-words"][layer_name]
diff --git a/brainscore_language/model_helpers/modeling_suma.py b/brainscore_language/model_helpers/modeling_suma.py
new file mode 100644
index 00000000..b4d702a4
--- /dev/null
+++ b/brainscore_language/model_helpers/modeling_suma.py
@@ -0,0 +1,1176 @@
+# coding=utf-8
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" PyTorch SUMA model adapted from LLaMA."""
+import math
+import warnings
+import numpy as np
+from typing import List, Optional, Tuple, Union, Any, Dict
+
+import torch
+import torch.nn.functional as F
+import torch.utils.checkpoint
+from torch import nn
+from torch.nn import CrossEntropyLoss
+from pandas import read_pickle
+
+from transformers.activations import ACT2FN 
+from transformers.modeling_attn_mask_utils import (
+    AttentionMaskConverter,
+    _prepare_4d_attention_mask,
+    _prepare_4d_causal_attention_mask,
+)
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.modeling_outputs import ModelOutput, CausalLMOutputWithPast
+from transformers.modeling_utils import PreTrainedModel
+from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS 
+from transformers.utils import (
+    add_start_docstrings,
+    add_start_docstrings_to_model_forward,
+    logging,
+    replace_return_docstrings,
+)
+
+from dataclasses import dataclass
+
+@dataclass
+class BaseModelOutputWithPast(ModelOutput):
+    """
+    Base class for model's outputs that may also contain a past key/values (to speed up sequential decoding).
+
+    Args:
+        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
+            Sequence of hidden-states at the output of the last layer of the model.
+
+            If `past_key_values` is used only the last hidden-state of the sequences of shape `(batch_size, 1,
+            hidden_size)` is output.
+        past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+            Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
+            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
+            `config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
+            encoder_sequence_length, embed_size_per_head)`.
+
+            Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if
+            `config.is_encoder_decoder=True` in the cross-attention blocks) that can be used (see `past_key_values`
+            input) to speed up sequential decoding.
+        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
+            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
+            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
+        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
+            sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+    """
+
+    last_hidden_state: torch.FloatTensor = None
+    past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
+    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
+    attentions: Optional[Tuple[torch.FloatTensor]] = None
+    langnet_states: Optional[Tuple[torch.FloatTensor]] = None
+    internal_states: Optional[Tuple[torch.FloatTensor]] = None
+
+def custom_init_weights(module, method, variance: float=1):
+    if isinstance(module, nn.Linear) or isinstance(module, nn.Embedding):
+        if method == "uniform":
+            nn.init.uniform_(module.weight)
+        elif method == "normal":
+            nn.init.normal_(module.weight, std=variance)
+        elif method == "xavier_uniform":
+            nn.init.xavier_uniform_(module.weight, gain=variance)
+        elif method == "xavier_normal":
+            nn.init.xavier_normal_(module.weight, gain=variance)
+        elif method == "kaiming_uniform":
+            nn.init.kaiming_uniform_(module.weight)
+        elif method == "kaiming_normal":
+            nn.init.kaiming_normal_(module.weight)
+        elif method == "orthogonal":
+            nn.init.orthogonal_(module.weight, gain=variance)
+        
+        if hasattr(module, "bias") and module.bias is not None:
+            module.bias.data.fill_(0) # this was 0.01
+
+logger = logging.get_logger(__name__)
+
+_CONFIG_FOR_DOC = "SUMAConfig"
+
+# coding=utf-8
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" SUMA model configuration"""
+
+logger = logging.get_logger(__name__)
+
+class SUMAConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the LLaMA-7B.
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 32000):
+            Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`LlamaModel`]
+        hidden_size (`int`, *optional*, defaults to 4096):
+            Dimension of the hidden representations.
+        intermediate_size (`int`, *optional*, defaults to 11008):
+            Dimension of the MLP representations.
+        num_hidden_layers (`int`, *optional*, defaults to 32):
+            Number of hidden layers in the Transformer decoder.
+        num_attention_heads (`int`, *optional*, defaults to 32):
+            Number of attention heads for each attention layer in the Transformer decoder.
+        num_key_value_heads (`int`, *optional*):
+            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+            by meanpooling all the original heads within that group. For more details checkout [this
+            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
+            `num_attention_heads`.
+        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
+            The non-linear activation function (function or string) in the decoder.
+        max_position_embeddings (`int`, *optional*, defaults to 2048):
+            The maximum sequence length that this model might ever be used with. Llama 1 supports up to 2048 tokens,
+            Llama 2 up to 4096, CodeLlama up to 16384.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
+            The epsilon used by the rms normalization layers.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`.
+        pad_token_id (`int`, *optional*):
+            Padding token id.
+        bos_token_id (`int`, *optional*, defaults to 1):
+            Beginning of stream token id.
+        eos_token_id (`int`, *optional*, defaults to 2):
+            End of stream token id.
+        pretraining_tp (`int`, *optional*, defaults to 1):
+            Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
+            document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is
+            necessary to ensure exact reproducibility of the pretraining results. Please refer to [this
+            issue](https://github.com/pytorch/pytorch/issues/76232).
+        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+            Whether to tie weight embeddings
+        rope_theta (`float`, *optional*, defaults to 10000.0):
+            The base period of the RoPE embeddings.
+        rope_scaling (`Dict`, *optional*):
+            Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
+            strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
+            `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
+            `max_position_embeddings` to the expected new maximum. See the following thread for more information on how
+            these scaling strategies behave:
+            https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
+            experimental feature, subject to breaking API changes in future versions.
+        attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
+            Whether to use a bias in the query, key, value and output projection layers during self-attention.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+
+    ```python
+    >>> from transformers import LlamaModel, SUMAConfig
+
+    >>> # Initializing a LLaMA llama-7b style configuration
+    >>> configuration = SUMAConfig()
+
+    >>> # Initializing a model from the llama-7b style configuration
+    >>> model = LlamaModel(configuration)
+
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+
+    model_type = "llama"
+    keys_to_ignore_at_inference = ["past_key_values"]
+
+    def __init__(
+        self,
+        vocab_size=32000,
+        hidden_size=4096,
+        intermediate_size=11008,
+        num_hidden_layers=32,
+        num_attention_heads=32,
+        num_key_value_heads=None,
+        hidden_act="silu",
+        max_position_embeddings=2048,
+        initializer_range=0.02,
+        rms_norm_eps=1e-6,
+        use_cache=True,
+        pad_token_id=None,
+        bos_token_id=1,
+        eos_token_id=2,
+        pretraining_tp=1,
+        tie_word_embeddings=False,
+        rope_theta=10000.0,
+        rope_scaling=None,
+        attention_bias=False,
+        attention_dropout=0.0,
+        num_cycles=2,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+
+        # for backward compatibility
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.pretraining_tp = pretraining_tp
+        self.use_cache = use_cache
+        self.attention_bias = attention_bias
+        self.attention_dropout = attention_dropout
+        self.num_cycles = num_cycles
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+        
+class Cache:
+    """
+    Base, abstract class for all caches. The actual data structure is specific to each subclass.
+    """
+
+    def update(
+        self,
+        key_states: torch.Tensor,
+        value_states: torch.Tensor,
+        layer_idx: int,
+        cache_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.
+
+        Parameters:
+            key_states (`torch.Tensor`):
+                The new key states to cache.
+            value_states (`torch.Tensor`):
+                The new value states to cache.
+            layer_idx (`int`):
+                The index of the layer to cache the states for.
+            cache_kwargs (`Dict[str, Any]`, `optional`):
+                Additional arguments for the cache subclass. These are specific to each subclass and allow new types of
+                cache to be created.
+
+        Return:
+            A tuple containing the updated key and value states.
+        """
+        raise NotImplementedError("Make sure to implement `update` in a subclass.")
+
+    def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
+        """Returns the sequence length of the cached states. A layer index can be optionally passed."""
+        raise NotImplementedError("Make sure to implement `get_seq_length` in a subclass.")
+
+    def get_max_length(self) -> Optional[int]:
+        """Returns the maximum sequence length of the cached states, if there is any."""
+        raise NotImplementedError("Make sure to implement `get_max_length` in a subclass.")
+
+    def get_usable_length(self, new_seq_length: int, layer_idx: Optional[int] = 0) -> int:
+        """Given the sequence length of the new inputs, returns the usable length of the cache."""
+        # Cache without size limit -> all cache is usable
+        # Cache with size limit -> if the length cache plus the length of the new inputs is larger the maximum cache
+        #   length, we will need to evict part of the cache (and thus not all cache is usable)
+        max_length = self.get_max_length()
+        previous_seq_length = self.get_seq_length(layer_idx)
+        if max_length is not None and previous_seq_length + new_seq_length > max_length:
+            return max_length - new_seq_length
+        return previous_seq_length
+
+class DynamicCache(Cache):
+    """
+    A cache that grows dynamically as more tokens are generated. This is the default for generative models.
+
+    It stores the Key and Value states as a list of tensors, one for each layer. The expected shape for each tensor is
+    `[batch_size, num_heads, seq_len, head_dim]`.
+    """
+
+    def __init__(self) -> None:
+        self.key_cache: List[torch.Tensor] = []
+        self.value_cache: List[torch.Tensor] = []
+        self.seen_tokens = 0  # Used in `generate` to keep tally of how many tokens the cache has seen
+
+    def __getitem__(self, layer_idx: int) -> List[Tuple[torch.Tensor]]:
+        """
+        Support for backwards-compatible `past_key_value` indexing, e.g. `past_key_value[0][0].shape[2]` to get the
+        sequence length.
+        """
+        if layer_idx < len(self):
+            return (self.key_cache[layer_idx], self.value_cache[layer_idx])
+        else:
+            raise KeyError(f"Cache only has {len(self)} layers, attempted to access layer with index {layer_idx}")
+
+    def __iter__(self):
+        """
+        Support for backwards-compatible `past_key_value` iteration, e.g. `for x in past_key_value:` to iterate over
+        keys and values
+        """
+        for layer_idx in range(len(self)):
+            yield (self.key_cache[layer_idx], self.value_cache[layer_idx])
+
+    def __len__(self):
+        """
+        Support for backwards-compatible `past_key_value` length, e.g. `len(past_key_value)`. This value corresponds
+        to the number of layers in the model.
+        """
+        return len(self.key_cache)
+
+    def update(
+        self,
+        key_states: torch.Tensor,
+        value_states: torch.Tensor,
+        layer_idx: int,
+        cache_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.
+
+        Parameters:
+            key_states (`torch.Tensor`):
+                The new key states to cache.
+            value_states (`torch.Tensor`):
+                The new value states to cache.
+            layer_idx (`int`):
+                The index of the layer to cache the states for.
+            cache_kwargs (`Dict[str, Any]`, `optional`):
+                Additional arguments for the cache subclass. No additional arguments are used in `DynamicCache`.
+
+        Return:
+            A tuple containing the updated key and value states.
+        """
+        # Update the number of seen tokens
+        if layer_idx == 0:
+            self.seen_tokens += key_states.shape[-2]
+
+        # Update the cache
+        if len(self.key_cache) <= layer_idx:
+            self.key_cache.append(key_states)
+            self.value_cache.append(value_states)
+        else:
+            self.key_cache[layer_idx] = torch.cat([self.key_cache[layer_idx], key_states], dim=-2)
+            self.value_cache[layer_idx] = torch.cat([self.value_cache[layer_idx], value_states], dim=-2)
+
+        return self.key_cache[layer_idx], self.value_cache[layer_idx]
+
+    def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
+        """Returns the sequence length of the cached states. A layer index can be optionally passed."""
+        if len(self.key_cache) <= layer_idx:
+            return 0
+        return self.key_cache[layer_idx].shape[-2]
+
+    def get_max_length(self) -> Optional[int]:
+        """Returns the maximum sequence length of the cached states. DynamicCache does not have a maximum length."""
+        return None
+
+    def reorder_cache(self, beam_idx: torch.LongTensor):
+        """Reorders the cache for beam search, given the selected beam indices."""
+        for layer_idx in range(len(self.key_cache)):
+            device = self.key_cache[layer_idx].device
+            self.key_cache[layer_idx] = self.key_cache[layer_idx].index_select(0, beam_idx.to(device))
+            device = self.value_cache[layer_idx].device
+            self.value_cache[layer_idx] = self.value_cache[layer_idx].index_select(0, beam_idx.to(device))
+
+    def to_legacy_cache(self) -> Tuple[Tuple[torch.Tensor], Tuple[torch.Tensor]]:
+        """Converts the `DynamicCache` instance into the its equivalent in the legacy cache format."""
+        legacy_cache = ()
+        for layer_idx in range(len(self)):
+            legacy_cache += ((self.key_cache[layer_idx], self.value_cache[layer_idx]),)
+        return legacy_cache
+
+    @classmethod
+    def from_legacy_cache(cls, past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None) -> "DynamicCache":
+        """Converts a cache in the legacy cache format into an equivalent `DynamicCache`."""
+        cache = cls()
+        if past_key_values is not None:
+            for layer_idx in range(len(past_key_values)):
+                key_states, value_states = past_key_values[layer_idx]
+                cache.update(key_states, value_states, layer_idx)
+        return cache
+   
+class LlamaRMSNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-6):
+        """
+        LlamaRMSNorm is equivalent to T5LayerNorm
+        """
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+
+    def forward(self, hidden_states):
+        input_dtype = hidden_states.dtype
+        hidden_states = hidden_states.to(torch.float32)
+        variance = hidden_states.pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+        return self.weight * hidden_states.to(input_dtype)
+
+ALL_LAYERNORM_LAYERS.append(LlamaRMSNorm)
+
+def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
+    """
+    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
+    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
+    """
+    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
+    if n_rep == 1:
+        return hidden_states
+    hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
+    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
+
+class LlamaAttention(nn.Module):
+    """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+    def __init__(self, config: SUMAConfig, layer_idx: Optional[int] = None):
+        super().__init__()
+        self.config = config
+        self.layer_idx = layer_idx
+        if layer_idx is None:
+            logger.warning_once(
+                f"Instantiating {self.__class__.__name__} without passing `layer_idx` is not recommended and will "
+                "to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` "
+                "when creating this class."
+            )
+
+        self.attention_dropout = config.attention_dropout
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_dim = self.hidden_size // self.num_heads
+        self.num_key_value_heads = config.num_key_value_heads
+        self.num_key_value_groups = self.num_heads // self.num_key_value_heads
+        self.max_position_embeddings = config.max_position_embeddings
+        self.is_causal = True
+
+        self.use_pos_emb = False
+
+        if (self.head_dim * self.num_heads) != self.hidden_size:
+            raise ValueError(
+                f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
+                f" and `num_heads`: {self.num_heads})."
+            )
+
+        self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=config.attention_bias)
+        self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
+        self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
+        self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=config.attention_bias)
+
+    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: bool = False,
+        use_cache: bool = False,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        if "padding_mask" in kwargs:
+            warnings.warn(
+                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+            )
+
+        bsz, q_len, _ = hidden_states.size()
+
+        if self.config.pretraining_tp > 1:
+            key_value_slicing = (self.num_key_value_heads * self.head_dim) // self.config.pretraining_tp
+            query_slices = self.q_proj.weight.split(
+                (self.num_heads * self.head_dim) // self.config.pretraining_tp, dim=0
+            )
+            key_slices = self.k_proj.weight.split(key_value_slicing, dim=0)
+            value_slices = self.v_proj.weight.split(key_value_slicing, dim=0)
+
+            query_states = [F.linear(hidden_states, query_slices[i]) for i in range(self.config.pretraining_tp)]
+            query_states = torch.cat(query_states, dim=-1)
+
+            key_states = [F.linear(hidden_states, key_slices[i]) for i in range(self.config.pretraining_tp)]
+            key_states = torch.cat(key_states, dim=-1)
+
+            value_states = [F.linear(hidden_states, value_slices[i]) for i in range(self.config.pretraining_tp)]
+            value_states = torch.cat(value_states, dim=-1)
+
+        else:
+            query_states = self.q_proj(hidden_states)
+            key_states = self.k_proj(hidden_states) 
+            value_states = self.v_proj(hidden_states)
+
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+
+        kv_seq_len = key_states.shape[-2]
+        if past_key_value is not None:
+            if self.layer_idx is None:
+                raise ValueError(
+                    f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
+                    "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
+                    "with a layer index."
+                )
+            kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+
+        key_states = repeat_kv(key_states, self.num_key_value_groups)
+        value_states = repeat_kv(value_states, self.num_key_value_groups)
+
+        attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
+
+        if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+            raise ValueError(
+                f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
+                f" {attn_weights.size()}"
+            )
+
+        if attention_mask is not None:
+            if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+                raise ValueError(
+                    f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+                )
+            ######### Badr Edits to account for custom AttentionMaskConverter ##########
+            # if self.add_attn_mask:
+            attn_weights = attn_weights + attention_mask
+            # else:
+            # attn_weights = attn_weights * attention_mask
+
+        # upcast attention to fp32
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
+
+        # attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
+        attn_output = torch.matmul(attn_weights, value_states)
+
+        if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+            raise ValueError(
+                f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+                f" {attn_output.size()}"
+            )
+
+        attn_output = attn_output.transpose(1, 2).contiguous()
+
+        attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+
+        if self.config.pretraining_tp > 1:
+            attn_output = attn_output.split(self.hidden_size // self.config.pretraining_tp, dim=2)
+            o_proj_slices = self.o_proj.weight.split(self.hidden_size // self.config.pretraining_tp, dim=1)
+            attn_output = sum([F.linear(attn_output[i], o_proj_slices[i]) for i in range(self.config.pretraining_tp)])
+        else:
+            attn_output = self.o_proj(attn_output)
+
+        if not output_attentions:
+            attn_weights = None
+
+        return attn_output, attn_weights, past_key_value
+
+LLAMA_ATTENTION_CLASSES = {
+    "eager": LlamaAttention,
+}
+
+class SUMADecoderLayer(nn.Module):
+    def __init__(self, config: SUMAConfig, layer_idx: int):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+
+        self.self_attn = LLAMA_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)                
+        self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        **kwargs,
+    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
+        """
+        Args:
+            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+            attention_mask (`torch.FloatTensor`, *optional*):
+                attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
+                query_sequence_length, key_sequence_length)` if default attention is used.
+            output_attentions (`bool`, *optional*):
+                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+                returned tensors for more detail.
+            use_cache (`bool`, *optional*):
+                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+                (see `past_key_values`).
+            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+        """
+        if "padding_mask" in kwargs:
+            warnings.warn(
+                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+            )
+
+        internal_states = []
+
+        residual = hidden_states
+
+        hidden_states = self.input_layernorm(hidden_states)
+
+        internal_states += [hidden_states]
+
+        # Self Attention
+        hidden_states, self_attn_weights, present_key_value = self.self_attn(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_value=past_key_value,
+            output_attentions=output_attentions,
+            use_cache=use_cache,
+            **kwargs,
+        )
+
+        internal_states += [hidden_states]
+
+        hidden_states = residual + hidden_states
+
+        outputs = (hidden_states, internal_states)
+
+        if output_attentions:
+            outputs += (self_attn_weights,)
+
+        if use_cache:
+            outputs += (present_key_value,)
+
+        return outputs
+
+
+LLAMA_START_DOCSTRING = r"""
+    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    etc.)
+
+    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+    and behavior.
+
+    Parameters:
+        config ([`SUMAConfig`]):
+            Model configuration class with all the parameters of the model. Initializing with a config file does not
+            load the weights associated with the model, only the configuration. Check out the
+            [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+
+
+@add_start_docstrings(
+    "The bare LLaMA Model outputting raw hidden-states without any specific head on top.",
+    LLAMA_START_DOCSTRING,
+)
+class LlamaPreTrainedModel(PreTrainedModel):
+    config_class = SUMAConfig
+    base_model_prefix = "model"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["SUMADecoderLayer"]
+    _skip_keys_device_placement = "past_key_values"
+    _supports_cache_class = True
+
+    def _init_weights(self, module):
+        std = self.config.initializer_range
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+
+
+LLAMA_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+            Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+            it.
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are input IDs?](../glossary#input-ids)
+        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            If `past_key_values` is used, optionally only the last `input_ids` have to be input (see
+            `past_key_values`).
+
+            If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
+            and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
+            information on the default strategy.
+
+            - 1 indicates the head is **not masked**,
+            - 0 indicates the head is **masked**.
+        position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+            config.n_positions - 1]`.
+
+            [What are position IDs?](../glossary#position-ids)
+        past_key_values (`Cache` or `tuple(tuple(torch.FloatTensor))`, *optional*):
+            Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
+            blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values`
+            returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`.
+
+            Two formats are allowed:
+            - a [`~cache_utils.Cache`] instance;
+            - Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
+            shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`). This is also known as the legacy
+            cache format.
+
+            The model will output the same cache format that is fed as input. If no `past_key_values` are passed, the
+            legacy cache format will be returned.
+
+            If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't
+            have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids`
+            of shape `(batch_size, sequence_length)`.
+        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
+            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
+            model's internal embedding lookup matrix.
+        use_cache (`bool`, *optional*):
+            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
+            `past_key_values`).
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+
+@add_start_docstrings(
+    "The bare LLaMA Model outputting raw hidden-states without any specific head on top.",
+    LLAMA_START_DOCSTRING,
+)
+class SUMAModel(LlamaPreTrainedModel):
+    """
+    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`SUMADecoderLayer`]
+
+    Args:
+        config: SUMAConfig
+    """
+
+    def __init__(self, config: SUMAConfig):
+        super().__init__(config)
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+        self.num_cycles = config.num_cycles
+        self.language_mask = None
+
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+        self.layers = nn.ModuleList(
+            [SUMADecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
+        )
+        self._use_sdpa = config._attn_implementation == "sdpa"
+        self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
+        self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+        self.gradient_checkpointing = False
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.embed_tokens = value
+
+    def set_language_mask(self, language_mask):
+        self.language_mask = torch.tensor(language_mask.flatten())
+
+    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        num_cycles: Optional[int] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPast]:
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # retrieve input_ids and inputs_embeds
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+        elif input_ids is not None:
+            batch_size, seq_length = input_ids.shape[:2]
+        elif inputs_embeds is not None:
+            batch_size, seq_length = inputs_embeds.shape[:2]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+
+        if self.gradient_checkpointing and self.training:
+            if use_cache:
+                logger.warning_once(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                )
+                use_cache = False
+
+        past_key_values_length = 0
+        if use_cache:
+            use_legacy_cache = not isinstance(past_key_values, Cache)
+            if use_legacy_cache:
+                past_key_values = DynamicCache.from_legacy_cache(past_key_values)
+            past_key_values_length = past_key_values.get_usable_length(seq_length)
+
+        if position_ids is None:
+            device = input_ids.device if input_ids is not None else inputs_embeds.device
+            position_ids = torch.arange(
+                past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
+            )
+            position_ids = position_ids.unsqueeze(0)
+
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+
+       
+        # 4d mask is passed through the layers
+        attention_mask = _prepare_4d_causal_attention_mask(
+            attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
+        )
+
+        # embed positions
+        hidden_states = inputs_embeds
+
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        next_decoder_cache = None
+
+        if num_cycles is None:
+            if type(self.num_cycles) == str and "dynamic" in self.num_cycles:
+                if self.num_cycles == "dynamic":
+                    num_cycles = int(np.ceil(hidden_states.size(1) / 8))
+                else:
+                    factor = int(self.num_cycles.split("-")[1])
+                    num_cycles = int(np.ceil(hidden_states.size(1) / factor))
+                    
+            elif self.num_cycles == "default":
+                num_cycles = 1
+            else:
+                num_cycles = int(self.num_cycles)
+
+        internal_states = []
+        for _ in range(num_cycles):
+            for decoder_layer in self.layers:
+                if output_hidden_states:
+                    all_hidden_states += (hidden_states,)
+
+                if self.gradient_checkpointing and self.training:
+                    layer_outputs = self._gradient_checkpointing_func(
+                        decoder_layer.__call__,
+                        hidden_states,
+                        attention_mask,
+                        position_ids,
+                        past_key_values,
+                        output_attentions,
+                        use_cache,
+                    )
+                else:
+                    layer_outputs = decoder_layer(
+                        hidden_states,
+                        attention_mask=attention_mask,
+                        position_ids=position_ids,
+                        past_key_value=past_key_values,
+                        output_attentions=output_attentions,
+                        use_cache=use_cache,
+                    )
+
+                hidden_states = layer_outputs[0]
+                internal_states.extend(layer_outputs[1])
+
+                if use_cache:
+                    next_decoder_cache = layer_outputs[3 if output_attentions else 2]
+
+                if output_attentions:
+                    all_self_attns += (layer_outputs[2],)
+
+        hidden_states = self.norm(hidden_states)
+
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+
+        if self.language_mask is not None:
+            internal_states = torch.cat(internal_states, dim=-1)
+            langnet_states = internal_states[:,:,self.language_mask.bool()]
+        else:
+            langnet_states = hidden_states
+
+        internal_states = torch.stack(internal_states)
+        
+        next_cache = None
+        
+        if use_cache:
+            next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
+        if not return_dict:
+            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns, langnet_states] if v is not None)
+        
+        return BaseModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+            langnet_states=langnet_states,
+            internal_states=internal_states,
+        )
+
+class SUMAForCausalLM(LlamaPreTrainedModel):
+    _tied_weights_keys = ["lm_head.weight"]
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.use_cache = config.use_cache
+        self.model = SUMAModel(config)
+        self.vocab_size = config.vocab_size
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    def get_output_embeddings(self):
+        return self.lm_head
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head = new_embeddings
+
+    def set_decoder(self, decoder):
+        self.model = decoder
+
+    def get_decoder(self):
+        return self.model
+
+    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
+    @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, CausalLMOutputWithPast]:
+        r"""
+        Args:
+            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
+                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+                (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+
+        Returns:
+
+        Example:
+
+        ```python
+        >>> from transformers import AutoTokenizer, LlamaForCausalLM
+
+        >>> model = LlamaForCausalLM.from_pretrained(PATH_TO_CONVERTED_WEIGHTS)
+        >>> tokenizer = AutoTokenizer.from_pretrained(PATH_TO_CONVERTED_TOKENIZER)
+
+        >>> prompt = "Hey, are you conscious? Can you talk to me?"
+        >>> inputs = tokenizer(prompt, return_tensors="pt")
+
+        >>> # Generate
+        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
+        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
+        ```"""
+        use_cache = self.use_cache
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
+        outputs: BaseModelOutputWithPast = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=True,
+        )
+
+        hidden_states = outputs.langnet_states
+    
+        # hidden_states = outputs.langnet_states
+        if self.config.pretraining_tp > 1:
+            lm_head_slices = self.lm_head.weight.split(self.vocab_size // self.config.pretraining_tp, dim=0)
+            logits = [F.linear(hidden_states, lm_head_slices[i]) for i in range(self.config.pretraining_tp)]
+            logits = torch.cat(logits, dim=-1)
+        else:
+            learned_hidden_states = self.lm_base(inputs_embeds=hidden_states, attention_mask=attention_mask)[0]
+            logits = self.lm_head(learned_hidden_states)
+        logits = logits.float()
+
+        loss = None
+        if labels is not None:
+            # Shift so that tokens < n predict n
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss()
+            shift_logits = shift_logits.view(-1, self.config.vocab_size)
+            shift_labels = shift_labels.view(-1)
+            # Enable model parallelism
+            shift_labels = shift_labels.to(shift_logits.device)
+            loss = loss_fct(shift_logits, shift_labels)
+
+        if not return_dict:
+            output = (logits,) #+ outputs[1:]
+            return (loss,) + output if loss is not None else output
+
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+    def prepare_inputs_for_generation(
+        self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
+    ):
+        if past_key_values is not None:
+            if isinstance(past_key_values, Cache):
+                cache_length = past_key_values.get_seq_length()
+                past_length = past_key_values.seen_tokens
+                max_cache_length = past_key_values.get_max_length()
+            else:
+                cache_length = past_length = past_key_values[0][0].shape[2]
+                max_cache_length = None
+
+            # Keep only the unprocessed tokens:
+            # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
+            # some of the inputs are exclusivelly passed as part of the cache (e.g. when passing input_embeds as
+            # input)
+            if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]:
+                input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
+            # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard
+            # input_ids based on the past_length.
+            elif past_length < input_ids.shape[1]:
+                input_ids = input_ids[:, past_length:]
+            # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens.
+
+            # If we are about to go beyond the maximum cache length, we need to crop the input attention mask.
+            if (
+                max_cache_length is not None
+                and attention_mask is not None
+                and cache_length + input_ids.shape[1] > max_cache_length
+            ):
+                attention_mask = attention_mask[:, -max_cache_length:]
+
+        position_ids = kwargs.get("position_ids", None)
+        if attention_mask is not None and position_ids is None:
+            # create position_ids on the fly for batch generation
+            position_ids = attention_mask.long().cumsum(-1) - 1
+            position_ids.masked_fill_(attention_mask == 0, 1)
+            if past_key_values:
+                position_ids = position_ids[:, -input_ids.shape[1] :]
+
+        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
+        if inputs_embeds is not None and past_key_values is None:
+            model_inputs = {"inputs_embeds": inputs_embeds}
+        else:
+            model_inputs = {"input_ids": input_ids}
+
+        model_inputs.update(
+            {
+                "position_ids": position_ids,
+                "past_key_values": past_key_values,
+                "use_cache": kwargs.get("use_cache"),
+                "attention_mask": attention_mask,
+            }
+        )
+        return model_inputs
+
+    @staticmethod
+    def _reorder_cache(past_key_values, beam_idx):
+        reordered_past = ()
+        for layer_past in past_key_values:
+            reordered_past += (
+                tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
+            )
+        return reordered_past
\ No newline at end of file
diff --git a/brainscore_language/models/suma/__init__.py b/brainscore_language/models/suma/__init__.py
new file mode 100644
index 00000000..4063bbb9
--- /dev/null
+++ b/brainscore_language/models/suma/__init__.py
@@ -0,0 +1,30 @@
+from brainscore_language import model_registry
+from brainscore_language import ArtificialSubject
+from brainscore_language.model_helpers.huggingface import HuggingfaceSubject
+from brainscore_language.model_helpers.modeling_suma import SUMAModel, SUMAConfig
+from transformers import AutoTokenizer
+
+layer_names = [f'layers.{layer_num}.{layer_desc}' 
+    for layer_num in range(1) 
+    for layer_desc in ["input_layernorm", "self_attn"]
+]
+
+model_registry['suma'] = lambda: HuggingfaceSubject(
+    model_id='suma', 
+    model=SUMAModel(
+        config=SUMAConfig(
+            num_hidden_layers=1,
+            num_attention_heads=512,
+            num_key_value_heads=512,
+            num_cycles=2,
+        )
+    ), 
+    tokenizer=AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf'),
+    region_layer_mapping={ArtificialSubject.RecordingTarget.language_system: layer_names},
+    use_localizer=True,
+    localizer_kwargs={
+        'hidden_dim': 4096,
+        'batch_size': 16,
+        "top_k": 4096,
+    }
+)
\ No newline at end of file
diff --git a/examples/score_suma.py b/examples/score_suma.py
new file mode 100644
index 00000000..edbdce54
--- /dev/null
+++ b/examples/score_suma.py
@@ -0,0 +1,13 @@
+from brainscore_language import score
+
+model_score = score(model_identifier='suma', benchmark_identifier='Pereira2018.243sentences-linear')
+print(model_score)
+
+'''
+array(0.98581247)
+Attributes:
+    raw:                   <xarray.Score ()>\narray(0.34876988)
+    ceiling:               <xarray.Score 'data' ()>\narray(0.35378928)
+    model_identifier:      suma
+    benchmark_identifier:  Pereira2018.243sentences-linear
+'''
\ No newline at end of file

From aa4fac8a7664e3738eabb7e03bc38f9ac442df2b Mon Sep 17 00:00:00 2001
From: Badr AlKhamissi <badr@khamissi.com>
Date: Sun, 18 Aug 2024 19:43:49 +0200
Subject: [PATCH 7/8] added support for ridge regression

---
 .../metrics/linear_predictivity/__init__.py        |  3 ++-
 .../metrics/linear_predictivity/metric.py          | 14 +++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/brainscore_language/metrics/linear_predictivity/__init__.py b/brainscore_language/metrics/linear_predictivity/__init__.py
index 30826d9f..b7a9d9e6 100644
--- a/brainscore_language/metrics/linear_predictivity/__init__.py
+++ b/brainscore_language/metrics/linear_predictivity/__init__.py
@@ -1,4 +1,5 @@
 from brainscore_language import metric_registry
-from .metric import linear_pearsonr
+from .metric import linear_pearsonr, ridge_pearsonr
 
 metric_registry['linear_pearsonr'] = linear_pearsonr
+metric_registry['ridge_pearsonr'] = ridge_pearsonr
\ No newline at end of file
diff --git a/brainscore_language/metrics/linear_predictivity/metric.py b/brainscore_language/metrics/linear_predictivity/metric.py
index be574e46..c0dcd251 100644
--- a/brainscore_language/metrics/linear_predictivity/metric.py
+++ b/brainscore_language/metrics/linear_predictivity/metric.py
@@ -1,6 +1,6 @@
 import numpy as np
 import scipy.stats
-from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import LinearRegression, RidgeCV
 from sklearn.preprocessing import scale
 
 from brainio.assemblies import NeuroidAssembly, array_is_element, DataAssembly
@@ -157,6 +157,11 @@ def __call__(self, source: DataAssembly, target: DataAssembly) -> Score:
             coord: (dims, value) for coord, dims, value in walk_coords(target)}, dims=target.dims)
         return self.cross_regressed_correlation(source, target)
 
+def ridge_regression(xarray_kwargs=None):
+    regression = RidgeCV(alphas=[10 ** x for x in range(-10, 10)])
+    xarray_kwargs = xarray_kwargs or {}
+    regression = XarrayRegression(regression, **xarray_kwargs)
+    return regression
 
 def linear_regression(xarray_kwargs=None):
     regression = LinearRegression()
@@ -164,13 +169,16 @@ def linear_regression(xarray_kwargs=None):
     regression = XarrayRegression(regression, **xarray_kwargs)
     return regression
 
-
 def pearsonr_correlation(xarray_kwargs=None):
     xarray_kwargs = xarray_kwargs or {}
     return XarrayCorrelation(scipy.stats.pearsonr, **xarray_kwargs)
 
-
 def linear_pearsonr(*args, regression_kwargs=None, correlation_kwargs=None, **kwargs):
     regression = linear_regression(regression_kwargs or {})
     correlation = pearsonr_correlation(correlation_kwargs or {})
     return CrossRegressedCorrelation(*args, regression=regression, correlation=correlation, **kwargs)
+
+def ridge_pearsonr(*args, regression_kwargs=None, correlation_kwargs=None, **kwargs):
+    regression = ridge_regression(regression_kwargs or {})
+    correlation = pearsonr_correlation(correlation_kwargs or {})
+    return CrossRegressedCorrelation(*args, regression=regression, correlation=correlation, **kwargs)

From 856f53073a583edf7fa2e2347bcfe41e5919ecde Mon Sep 17 00:00:00 2001
From: Badr AlKhamissi <badr@khamissi.com>
Date: Sun, 18 Aug 2024 19:50:01 +0200
Subject: [PATCH 8/8] added rdm and cka metrics

---
 brainscore_language/metrics/cka/__init__.py |   4 +
 brainscore_language/metrics/cka/metric.py   | 111 ++++++++++++++++++++
 brainscore_language/metrics/rdm/__init__.py |  13 +++
 brainscore_language/metrics/rdm/metric.py   | 106 +++++++++++++++++++
 4 files changed, 234 insertions(+)
 create mode 100644 brainscore_language/metrics/cka/__init__.py
 create mode 100644 brainscore_language/metrics/cka/metric.py
 create mode 100644 brainscore_language/metrics/rdm/__init__.py
 create mode 100644 brainscore_language/metrics/rdm/metric.py

diff --git a/brainscore_language/metrics/cka/__init__.py b/brainscore_language/metrics/cka/__init__.py
new file mode 100644
index 00000000..ad2dd7ec
--- /dev/null
+++ b/brainscore_language/metrics/cka/__init__.py
@@ -0,0 +1,4 @@
+from brainscore_language import metric_registry
+from .metric import CKACrossValidated
+
+metric_registry['cka'] = CKACrossValidated
\ No newline at end of file
diff --git a/brainscore_language/metrics/cka/metric.py b/brainscore_language/metrics/cka/metric.py
new file mode 100644
index 00000000..49f97cf0
--- /dev/null
+++ b/brainscore_language/metrics/cka/metric.py
@@ -0,0 +1,111 @@
+import math
+import numpy as np
+
+from brainscore_core.metrics import Score
+from brainscore_language.utils.transformations import TestOnlyCrossValidation
+
+
+class Defaults:
+    expected_dims = ('presentation', 'neuroid')
+    stimulus_coord = 'stimulus_id'
+    neuroid_dim = 'neuroid'
+    neuroid_coord = 'neuroid_id'
+
+def centering(K):
+    n = K.shape[0]
+    unit = np.ones([n, n])
+    I = np.eye(n)
+    H = I - unit / n
+
+    return np.dot(np.dot(H, K), H)
+    # HKH are the same with KH, KH is the first centering, H(KH) do the second time,
+    # results are the sme with one time centering
+    # return np.dot(H, K)  # KH
+
+
+def rbf(X, sigma=None):
+    GX = np.dot(X, X.T)
+    KX = np.diag(GX) - GX + (np.diag(GX) - GX).T
+    if sigma is None:
+        mdist = np.median(KX[KX != 0])
+        sigma = math.sqrt(mdist)
+    KX *= - 0.5 / (sigma * sigma)
+    KX = np.exp(KX)
+    return KX
+
+
+def kernel_HSIC(X, Y, sigma):
+    return np.sum(centering(rbf(X, sigma)) * centering(rbf(Y, sigma)))
+
+
+def linear_HSIC(X, Y):
+    L_X = np.dot(X, X.T)
+    L_Y = np.dot(Y, Y.T)
+    return np.sum(centering(L_X) * centering(L_Y))
+
+
+def linear_CKA(X, Y):
+    hsic = linear_HSIC(X, Y)
+    var1 = np.sqrt(linear_HSIC(X, X))
+    var2 = np.sqrt(linear_HSIC(Y, Y))
+
+    return hsic / (var1 * var2)
+
+
+def kernel_CKA(X, Y, sigma=None):
+    hsic = kernel_HSIC(X, Y, sigma)
+    var1 = np.sqrt(kernel_HSIC(X, X, sigma))
+    var2 = np.sqrt(kernel_HSIC(Y, Y, sigma))
+
+    return hsic / (var1 * var2)
+
+class CKAMetric:
+    """
+    Computes a similarity index for the similarity between two assemblies with centered kernel alignment (CKA).
+
+    Kornblith et al., 2019 http://proceedings.mlr.press/v97/kornblith19a/kornblith19a.pdf
+    """
+
+    def __init__(self, comparison_coord=Defaults.stimulus_coord):
+        self._comparison_coord = comparison_coord
+
+    def __call__(self, assembly1, assembly2):
+        """
+        :param brainscore.assemblies.NeuroidAssembly assembly1:
+        :param brainscore.assemblies.NeuroidAssembly assembly2:
+        :return: brainscore.assemblies.DataAssembly
+        """
+        # ensure value order
+        assembly1 = assembly1.sortby(self._comparison_coord)
+        assembly2 = assembly2.sortby(self._comparison_coord)
+        assert (assembly1[self._comparison_coord].values == assembly2[self._comparison_coord].values).all()
+        # ensure dimensions order
+        dims = assembly1[self._comparison_coord].dims
+        np.testing.assert_array_equal(assembly2[self._comparison_coord].dims, dims)
+        assembly1 = assembly1.transpose(*(list(dims) + [dim for dim in assembly1.dims if dim not in dims]))
+        assembly2 = assembly2.transpose(*(list(dims) + [dim for dim in assembly2.dims if dim not in dims]))
+        similarity = linear_CKA(assembly1, assembly2)
+        return Score(similarity)
+
+class CKACrossValidated:
+    """
+    Computes a cross-validated similarity index for the similarity between two assemblies
+    with centered kernel alignment (CKA).
+
+    Kornblith et al., 2019 http://proceedings.mlr.press/v97/kornblith19a/kornblith19a.pdf
+    """
+
+    def __init__(self, comparison_coord=Defaults.stimulus_coord, crossvalidation_kwargs=None):
+        self._metric = CKAMetric(comparison_coord=comparison_coord)
+        crossvalidation_defaults = dict(test_size=.9)  # leave 10% out
+        crossvalidation_kwargs = {**crossvalidation_defaults, **(crossvalidation_kwargs or {})}
+        self._cross_validation = TestOnlyCrossValidation(**crossvalidation_kwargs)
+
+    def __call__(self, assembly1, assembly2):
+        """
+        :param brainio.assemblies.NeuroidAssembly assembly1:
+        :param brainio.assemblies.NeuroidAssembly assembly2:
+        :return: brainscore.metrics.Score
+        """
+
+        return self._cross_validation(assembly1, assembly2, apply=self._metric)
\ No newline at end of file
diff --git a/brainscore_language/metrics/rdm/__init__.py b/brainscore_language/metrics/rdm/__init__.py
new file mode 100644
index 00000000..f53f5f8d
--- /dev/null
+++ b/brainscore_language/metrics/rdm/__init__.py
@@ -0,0 +1,13 @@
+from brainscore_language import metric_registry
+from .metric import RDMCrossValidated
+
+metric_registry['rdm'] = RDMCrossValidated
+
+BIBTEX = """@article{kriegeskorte2008representational,
+  title={Representational similarity analysis-connecting the branches of systems neuroscience},
+  author={Kriegeskorte, Nikolaus and Mur, Marieke and Bandettini, Peter A},
+  journal={Frontiers in systems neuroscience},
+  pages={4},
+  year={2008},
+  publisher={Frontiers}
+}"""
\ No newline at end of file
diff --git a/brainscore_language/metrics/rdm/metric.py b/brainscore_language/metrics/rdm/metric.py
new file mode 100644
index 00000000..e5a46be4
--- /dev/null
+++ b/brainscore_language/metrics/rdm/metric.py
@@ -0,0 +1,106 @@
+import numpy as np
+from scipy.stats import spearmanr
+
+from brainio.assemblies import DataAssembly, walk_coords, NeuroidAssembly
+from brainscore_core.metrics import Metric, Score
+from brainscore_language.utils.transformations import TestOnlyCrossValidation
+
+class XarrayDefaults:
+    expected_dims = ('presentation', 'neuroid')
+    stimulus_coord = 'stimulus_id'
+    neuroid_dim = 'neuroid'
+    neuroid_coord = 'neuroid_id'
+
+class RDMCrossValidated(Metric):
+    """
+    Computes a coefficient for the similarity between two `RDM`s, using the upper triangular regions
+
+    Kriegeskorte et al., 2008 https://doi.org/10.3389/neuro.06.004.2008
+    """
+
+    def __init__(self, neuroid_dim=XarrayDefaults.neuroid_dim, comparison_coord=XarrayDefaults.stimulus_coord,
+                 crossvalidation_kwargs=None):
+        self._metric = RDMMetric(neuroid_dim=neuroid_dim, comparison_coord=comparison_coord)
+        crossvalidation_defaults = dict(test_size=.9)  # leave 10% out
+        # crossvalidation_defaults = dict(train_size=.9, test_size=None)
+        crossvalidation_kwargs = {**crossvalidation_defaults, **(crossvalidation_kwargs or {})}
+        self._cross_validation = TestOnlyCrossValidation(**crossvalidation_kwargs)
+
+    def __call__(self, assembly1: NeuroidAssembly, assembly2: NeuroidAssembly) -> Score:
+        return self._cross_validation(assembly1, assembly2, apply=self._metric)
+
+
+class RDMMetric(Metric):
+    """
+    Computes a coefficient for the similarity between two `RDM`s, using the upper triangular regions
+
+    Kriegeskorte et al., 2008 https://doi.org/10.3389/neuro.06.004.2008
+    """
+
+    def __init__(self, neuroid_dim=XarrayDefaults.neuroid_dim, comparison_coord=XarrayDefaults.stimulus_coord):
+        self._neuroid_dim = neuroid_dim
+        self._rdm = RDM(neuroid_dim=neuroid_dim)
+        self._similarity = RDMSimilarity(comparison_coord=comparison_coord)
+
+    def __call__(self, assembly1: NeuroidAssembly, assembly2: NeuroidAssembly) -> Score:
+        rdm1 = self._rdm(assembly1)
+        rdm2 = self._rdm(assembly2)
+        similarity = self._similarity(rdm1, rdm2)
+        return Score(similarity)
+
+
+class RDM:
+    """
+    Representational Dissimilarity Matrix.
+    Converts an assembly of `presentation x neuroid` into a `neuroid x neuroid` RDM.
+
+    Kriegeskorte et al., 2008 https://doi.org/10.3389/neuro.06.004.2008
+    """
+
+    def __init__(self, neuroid_dim=XarrayDefaults.neuroid_dim):
+        self._neuroid_dim = neuroid_dim
+
+    def __call__(self, assembly):
+        assert len(assembly.dims) == 2
+        correlations = np.corrcoef(assembly) if assembly.dims[-1] == self._neuroid_dim else np.corrcoef(assembly.T).T
+        coords = {coord: coord_value for coord, coord_value in assembly.coords.items() if coord != self._neuroid_dim}
+        dims = [dim if dim != self._neuroid_dim else assembly.dims[(i - 1) % len(assembly.dims)]
+                for i, dim in enumerate(assembly.dims)]
+        similarities = DataAssembly(correlations, coords=coords, dims=dims)
+        return 1 - similarities
+
+
+class RDMSimilarity:
+    def __init__(self, comparison_coord=XarrayDefaults.stimulus_coord):
+        self._comparison_coord = comparison_coord
+
+    def __call__(self, rdm_assembly1, rdm_assembly2):
+        # align
+        rdm_assembly1 = self.multishape_preserved_sort(rdm_assembly1)
+        rdm_assembly2 = self.multishape_preserved_sort(rdm_assembly2)
+        assert (rdm_assembly1[self._comparison_coord].values == rdm_assembly2[self._comparison_coord].values).all()
+
+        triu1 = self._triangulars(rdm_assembly1.values)
+        triu2 = self._triangulars(rdm_assembly2.values)
+        corr, p = spearmanr(triu1, triu2)
+        return corr
+
+    def _triangulars(self, values):
+        assert len(values.shape) == 2 and values.shape[0] == values.shape[1]
+        # ensure diagonal is zero
+        diag = np.diag(values)
+        diag = np.nan_to_num(diag, nan=0, copy=True)  # we also accept nans in the diagonal from correlating zeros
+        np.testing.assert_almost_equal(diag, 0)
+        # index and retrieve upper triangular
+        triangular_indices = np.triu_indices(values.shape[0], k=1)
+        return values[triangular_indices]
+
+    def multishape_preserved_sort(self, assembly):
+        comparison_dims = assembly[self._comparison_coord].dims
+        assert set(assembly.dims) == set(comparison_dims), "multi-dimensional case not implemented"
+        indices = np.argsort(assembly[self._comparison_coord].values)
+        assembly = type(assembly)(assembly.values[np.ix_(indices, indices)],
+                                  coords={coord: (dims, values[indices] if dims == comparison_dims else values)
+                                          for coord, dims, values in walk_coords(assembly)},
+                                  dims=assembly.dims)
+        return assembly
\ No newline at end of file