-
Notifications
You must be signed in to change notification settings - Fork 0
/
sentiment.py
43 lines (30 loc) · 1.43 KB
/
sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np
from config import HF_SENT_MODEL_NAME
class SentimentAnalyzer:
def __init__(self):
# Load the tokenizer and model from HF hub
self.MODEL_NAME = HF_SENT_MODEL_NAME
self.tokenizer = AutoTokenizer.from_pretrained(self.MODEL_NAME)
# Load the model, use .cuda() to load it on the GPU
self.model = AutoModelForSequenceClassification.from_pretrained(self.MODEL_NAME)
self.label_list = ["negativo", "neutro", "positivo"]
def analyze(self, text):
# tokenize
input_ids = self.tokenizer.encode(text, add_special_tokens=True)
# Create tensor, use .cuda() to transfer the tensor to GPU if needed
# add fake batch dimension
tensor = torch.tensor(input_ids).long().unsqueeze(0)
# Call the model and get the logits
logits = self.model(tensor)
# Remove the fake batch dimension
# corrected by LS
logits = logits.logits.squeeze(0)
# The model was trained with a Log Likelyhood + Softmax combined loss,
# hence to extract probabilities we need a softmax on top of the logits tensor
proba = nn.functional.softmax(logits, dim=0)
np_proba = proba.detach().numpy()
# prende la prob più alta, l'indice e trasforma in label
return self.label_list[np_proba.argmax()]