Source code for pysentiment2.hiv4

import pandas as pd
from pysentiment2.base import STATIC_PATH, BaseDict


[docs]class HIV4(BaseDict): """ Dictionary class for Harvard IV-4. See also http://www.wjh.harvard.edu/~inquirer/ The terms for the dictionary are stemmed by the default tokenizer. """ PATH = '%s/HIV-4.csv' % STATIC_PATH
[docs] def init_dict(self): data = pd.read_csv(self.PATH, dtype='category') self._posset = set(data.query('Positiv == "Positiv"')['Entry'].apply(self.tokenize_first).dropna()) self._negset = set(data.query('Negativ == "Negativ"')['Entry'].apply(self.tokenize_first).dropna())