Source code for projectreport.analyzer.parsers.multi.main

from typing import Final, List, Optional, Sequence, Type

from cached_property import cached_property
from typing_extensions import TypeGuard

from projectreport.analyzer.parsers.base import Parser
from projectreport.analyzer.parsers.data_types import ParserDataType
from projectreport.analyzer.parsers.folder import FolderParser
from projectreport.analyzer.parsers.github import GithubParser
from projectreport.analyzer.parsers.multi.file import MultiFileParser
from projectreport.analyzer.parsers.url import URLParser
from projectreport.license.model import License
from projectreport.logger import logger
from projectreport.parser_types import StandaloneParser
from projectreport.version import Version

# TODO: Improve user ability to customize parsers. Currently they need to mutate the PARSERS
# array and PARSER_DOC_FILES dict to customize parsers as the matches_path classmethod does
# not take into account custom parsers passed by constructor.
PARSERS: Final[List[Type[StandaloneParser]]] = [
    MultiFileParser,
    GithubParser,
]


[docs]class MainMultiParser(Parser): """ The main parser that uses other parsers, both single- and multi- together to determine information about a folder. """
[docs] def __init__( self, path: str, file_names: Sequence[str], urls: Optional[Sequence[str]] = None, parsers: Optional[List[Type[StandaloneParser]]] = None, ): """ :param path: This should be the path of a folder, rather than a path to a file that the singular parsers accept. :param parsers: Defaults to PARSERS. """ self.parsers = parsers or PARSERS self.file_names = file_names self.urls = urls or [] super().__init__(path)
[docs] @classmethod def matches_path( cls, path: str, file_names: Sequence[str], urls: Optional[Sequence[str]] = None ) -> bool: urls = urls or [] for parser in PARSERS: if _parser_matches_path(parser, path, file_names, urls): return True return False
@cached_property def docstring(self) -> Optional[str]: return self._get_attr_from_first_parser_to_return_non_none("docstring") @cached_property def version(self) -> Optional[Version]: return self._get_attr_from_first_parser_to_return_non_none("version") @cached_property def topics(self) -> Optional[Sequence[str]]: return self._get_attr_from_first_parser_to_return_non_none("topics") @cached_property def license(self) -> Optional[License]: return self._get_attr_from_first_parser_to_return_non_none("license") def _get_attr_from_first_parser_to_return_non_none(self, attr: str): for parser in self.parsers: if _parser_matches_path(parser, self.path, self.file_names, self.urls): parser_obj = _construct_parser( parser, self.path, self.file_names, self.urls ) if parser_obj is None: continue value = getattr(parser_obj, attr) if value is not None: return value return None
def _parser_matches_path( parser: Type[StandaloneParser], folder: str, file_names: Sequence[str], urls: Sequence[str], ) -> bool: if _is_folder_parser(parser): return parser.matches_path(folder, file_names) elif _is_url_parser(parser): return any([parser.matches_path(url) for url in urls]) raise NotImplementedError( f"No handling for {parser} with data type {parser.data_type}" ) def _construct_parser( parser: Type[StandaloneParser], folder: str, file_names: Sequence[str], urls: Sequence[str], ) -> Optional[StandaloneParser]: if _is_folder_parser(parser): return parser(folder, file_names) elif _is_url_parser(parser): for url in urls: if parser.matches_path(url): return parser(url) logger.warn(f"The URL parser {parser} did not match the given urls {urls}") return None raise NotImplementedError( f"No handling for {parser} with data type {parser.data_type}" ) def _is_folder_parser(parser: Type[StandaloneParser]) -> TypeGuard[Type[FolderParser]]: return parser.data_type == ParserDataType.FOLDER def _is_url_parser(parser: Type[StandaloneParser]) -> TypeGuard[Type[URLParser]]: return parser.data_type == ParserDataType.URL