Source code for projectreport.analyzer.parsers.multi.main
from typing import Final, List, Optional, Sequence, Type
from cached_property import cached_property
from typing_extensions import TypeGuard
from projectreport.analyzer.parsers.base import Parser
from projectreport.analyzer.parsers.data_types import ParserDataType
from projectreport.analyzer.parsers.folder import FolderParser
from projectreport.analyzer.parsers.github import GithubParser
from projectreport.analyzer.parsers.multi.file import MultiFileParser
from projectreport.analyzer.parsers.url import URLParser
from projectreport.license.model import License
from projectreport.logger import logger
from projectreport.parser_types import StandaloneParser
from projectreport.version import Version
# TODO: Improve user ability to customize parsers. Currently they need to mutate the PARSERS
# array and PARSER_DOC_FILES dict to customize parsers as the matches_path classmethod does
# not take into account custom parsers passed by constructor.
PARSERS: Final[List[Type[StandaloneParser]]] = [
MultiFileParser,
GithubParser,
]
[docs]class MainMultiParser(Parser):
"""
The main parser that uses other parsers, both single- and multi- together to
determine information about a folder.
"""
[docs] def __init__(
self,
path: str,
file_names: Sequence[str],
urls: Optional[Sequence[str]] = None,
parsers: Optional[List[Type[StandaloneParser]]] = None,
):
"""
:param path: This should be the path of a folder, rather than a path to a file
that the singular parsers accept.
:param parsers: Defaults to PARSERS.
"""
self.parsers = parsers or PARSERS
self.file_names = file_names
self.urls = urls or []
super().__init__(path)
[docs] @classmethod
def matches_path(
cls, path: str, file_names: Sequence[str], urls: Optional[Sequence[str]] = None
) -> bool:
urls = urls or []
for parser in PARSERS:
if _parser_matches_path(parser, path, file_names, urls):
return True
return False
@cached_property
def docstring(self) -> Optional[str]:
return self._get_attr_from_first_parser_to_return_non_none("docstring")
@cached_property
def version(self) -> Optional[Version]:
return self._get_attr_from_first_parser_to_return_non_none("version")
@cached_property
def topics(self) -> Optional[Sequence[str]]:
return self._get_attr_from_first_parser_to_return_non_none("topics")
@cached_property
def license(self) -> Optional[License]:
return self._get_attr_from_first_parser_to_return_non_none("license")
def _get_attr_from_first_parser_to_return_non_none(self, attr: str):
for parser in self.parsers:
if _parser_matches_path(parser, self.path, self.file_names, self.urls):
parser_obj = _construct_parser(
parser, self.path, self.file_names, self.urls
)
if parser_obj is None:
continue
value = getattr(parser_obj, attr)
if value is not None:
return value
return None
def _parser_matches_path(
parser: Type[StandaloneParser],
folder: str,
file_names: Sequence[str],
urls: Sequence[str],
) -> bool:
if _is_folder_parser(parser):
return parser.matches_path(folder, file_names)
elif _is_url_parser(parser):
return any([parser.matches_path(url) for url in urls])
raise NotImplementedError(
f"No handling for {parser} with data type {parser.data_type}"
)
def _construct_parser(
parser: Type[StandaloneParser],
folder: str,
file_names: Sequence[str],
urls: Sequence[str],
) -> Optional[StandaloneParser]:
if _is_folder_parser(parser):
return parser(folder, file_names)
elif _is_url_parser(parser):
for url in urls:
if parser.matches_path(url):
return parser(url)
logger.warn(f"The URL parser {parser} did not match the given urls {urls}")
return None
raise NotImplementedError(
f"No handling for {parser} with data type {parser.data_type}"
)
def _is_folder_parser(parser: Type[StandaloneParser]) -> TypeGuard[Type[FolderParser]]:
return parser.data_type == ParserDataType.FOLDER
def _is_url_parser(parser: Type[StandaloneParser]) -> TypeGuard[Type[URLParser]]:
return parser.data_type == ParserDataType.URL