moevm · LapshinAE0 · Nov 21, 2025 · Nov 21, 2025 · Dec 1, 2025 · Dec 1, 2025
diff --git a/app/configs/config_abbreviations.json b/app/configs/config_abbreviations.json
@@ -0,0 +1,25 @@
+{
+  "common_abbr": [
+    "СССР", "РФ", "США", "ВКР", "ИТ", "ПО", "ООО", "ЗАО", "ОАО", "HTML", "CSS", 
+    "JS", "ЛЭТИ", "МОЕВМ", "ЭВМ", "ГОСТ", "DVD", "ИИ", "ОБЗОР", 
+    "ООП", "ЛР", "КР", "ОТЧЕТ", "ПЛАН", "СЛОВА", "ЦПУ", "МБ", "ОЗУ", "КБ",
+    "SSD", "PC", "HDD",
+    "AX", "BX", "CX", "DX", "SI", "DI", "BP", "SP",
+    "AH", "AL", "BH", "BL", "CH", "CL", "DH", "DL", 
+    "CS", "DS", "ES", "SS", "FS", "GS",
+    "IP", "EIP", "RIP", "URL",
+    "CF", "PF", "AF", "ZF", "SF", "TF", "IF", "DF", "OF",
+    "EAX", "EBX", "ECX", "EDX", "ESI", "EDI", "EBP", "ESP",
+    "RAX", "RBX", "RCX", "RDX", "RSI", "RDI", "RBP", "RSP",
+    "DOS", "OS", "BIOS", "UEFI", "MBR", "GPT",
+    "ASCII", "UTF", "UNICODE", "ANSI",
+    "ЭВМ", "МОЭВМ",
+    "CPU", "GPU", "APU", "RAM", "ROM", "PROM", "EPROM", "EEPROM",
+    "USB", "SATA", "PCI", "PCIe", "AGP", "ISA", "VGA", "HDMI", "DP",
+    "LAN", "WAN", "WLAN", "VPN", "ISP", "DNS", "DHCP", "TCP", "UDP", "IP",
+    "HTTP", "HTTPS", "FTP", "SSH", "SSL", "TLS", "XP", "ELF", "ACM", "IEEE", "UX",
+    "API", "GUI", "CLI", "IDE", "SDK", "SQL", "NoSQL", "XML", "JSON", "YAML",
+    "MAC", "IBM", "CERF", "LTR", "RTL", "FPS", "SHA", "AR", "EN", "RU", 
+    "CREAT", "FIFO", "RSS", "UML", "UI", "GB", "IJGBL"
+  ]
+}
diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py
@@ -22,6 +22,7 @@
     ['pres_image_capture'],
     ['task_tracker'],
     ['overview_in_tasks'],
+    ['pres_abbreviations_check'],
     ['pres_aspect_ratio_check'],
     ['pres_was_were_check'],
 ]
@@ -53,6 +54,7 @@
     ["empty_task_page_check"],
     ["water_in_the_text_check"],
     ["report_task_tracker"],
+    ["report_abbreviations_check"],
     ["report_was_were_check"],
 ]
 

diff --git a/app/main/checks/check_abbreviations.py b/app/main/checks/check_abbreviations.py
@@ -0,0 +1,137 @@
+import json
+import re
+from pathlib import Path
+
+from pymorphy3 import MorphAnalyzer
+
+morph = MorphAnalyzer()
+
+DEBUG_MODE = False
+
+
+def load_abbreviations():
+    config_path = Path(__file__).parent.parent.parent / "configs" / "config_abbreviations.json"
+    with open(config_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+        return set(data.get("common_abbr"))
+
+
+COMMON_ABBR = load_abbreviations()
+
+
+def debug_print(*args, **kwargs):
+    if DEBUG_MODE:
+        print(*args, **kwargs)
+
+
+def get_first_letters(phrase):
+    if not phrase:
+        return ""
+    words = phrase.split()
+    return "".join(word[0].upper() for word in words if word)
+
+
+def is_abbreviation_explained(abbr: str, text: str) -> bool:
+    patterns = [
+        rf"{abbr}\s*\(([^)]+)\)",  # АААА (расшифровка)
+        rf"\(([^)]+)\)\s*{abbr}",  # (расшифровка) АААА
+        rf"{abbr}\s*[—\-]\s*([^.,;!?]+)",  # АААА — расшифровка
+        rf"{abbr}\s*-\s*([^.,;!?]+)",  # АААА - расшифровка
+        rf"([^.,;!?]+)\s*[—\-]\s*{abbr}",  # расшифровка — АААА
+        rf"([^.,;!?]+)\s*-\s*{abbr}",  # расшифровка - АААА
+    ]
+
+    debug_print(f"Проверка аббревиатуры: {abbr}")
+    debug_print(f"Текст (первые 200 символов): {text[:200]}...")
+
+    for pattern in patterns:
+        match = re.search(pattern, text, re.IGNORECASE)
+
+        if match:
+            explanation = match.group(1)
+            debug_print(f" Найден паттерн {pattern}")
+            debug_print(f"  Расшифровка: {explanation}")
+
+            if correctly_explained(abbr, explanation):
+                debug_print("  Расшифровка корректна")
+                return True
+            else:
+                debug_print("  Расшифровка НЕ соответствует первым буквам")
+                debug_print(f"     Ожидалось: {abbr.upper()}")
+                debug_print(f"     Получено: {get_first_letters(explanation)}")
+
+    debug_print(f" Расшифровка для {abbr} не найдена")
+    return False
+
+
+def get_unexplained_abbrev(text, unverifiable_text):
+    abbreviations = find_abbreviations(text, unverifiable_text)
+
+    if not abbreviations:
+        return False, []
+
+    unexplained_abbr = []
+    for abbr in abbreviations:
+        if not is_abbreviation_explained(abbr, text):
+            unexplained_abbr.append(abbr)
+
+    return True, unexplained_abbr
+
+
+def find_abbreviations(text: str, unverifiable_text: str):
+    pattern = r"\b[А-ЯA-Z]{2,5}\b"
+    abbreviations = re.findall(pattern, text)
+
+    filtered_abbr = {
+        abbr
+        for abbr in abbreviations
+        if abbr not in COMMON_ABBR and abbr not in unverifiable_text and morph.parse(abbr.lower())[0].score != 0
+    }
+
+    return list(filtered_abbr)
+
+
+def correctly_explained(abbr, explan):
+    words = explan.split()
+
+    first_letters = ""
+    for word in words:
+        if word:
+            first_letters += word[0].upper()
+
+    return first_letters == abbr.upper()
+
+
+def main_check(text: str, unverifiable_text: str):
+    try:
+        debug_print(f"unverifiable_text : {unverifiable_text}")
+        continue_check = True
+        res_str = ""
+        if not text:
+            continue_check, res_str = False, "Не удалось получить текст"
+
+        abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=text, unverifiable_text=unverifiable_text)
+
+        if not abbr_is_finding:
+            continue_check, res_str = (
+                False,
+                "Аббревиатуры не найдены в представленном документе",
+            )
+
+        if not unexplained_abbr:
+            continue_check, res_str = False, "Все аббревиатуры правильно расшифрованы"
+
+        return continue_check, res_str, unexplained_abbr
+
+    except Exception as e:
+        return False, f"Ошибка при проверке аббревиатур: {str(e)}", {}
+
+
+def forming_response(unexplained_abbr_with_page, format_page_link):
+    result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:<br>"
+    page_links = format_page_link(list(unexplained_abbr_with_page.values()))
+    for index_links, abbr in enumerate(unexplained_abbr_with_page):
+        result_str += f"- {abbr} на {page_links[index_links]} странице<br>"
+    result_str += "Каждая аббревиатура должна быть расшифрована при первом использовании в тексте.<br>"
+    result_str += "Расшифровка должны быть по первыми буквам, например, МВД - Министерство внутренних дел.<br>"
+    return result_str
diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py
@@ -0,0 +1,37 @@
+from ..base_check import BasePresCriterion, answer
+from ..check_abbreviations import forming_response, main_check
+
+
+class PresAbbreviationsCheck(BasePresCriterion):
+    _description = "Аббревиатуры в тексте должны быть расшифрованы при первом использовании."
+    id = "pres_abbreviations_check"
+    warning = True
+
+    def __init__(self, file_info):
+        super().__init__(file_info)
+
+    def check(self):
+        try:
+            slides_text = self.file.get_text_from_slides()
+            title_page = slides_text[0]
+            full_text = " ".join(slides_text)
+
+            continue_check, res_str, unexplained_abbr = main_check(text=full_text, unverifiable_text=title_page)
+            if not continue_check:
+                return answer(True, res_str)
+
+            unexplained_abbr_with_slides = {}
+
+            for slide_num, slide_text in enumerate(slides_text, 0):
+                for abbr in unexplained_abbr:
+                    if abbr in slide_text and abbr not in unexplained_abbr_with_slides:
+                        unexplained_abbr_with_slides[abbr] = slide_num
+
+            if not unexplained_abbr_with_slides:
+                return answer(True, "Все аббревиатуры правильно расшифрованы")
+
+            result_str = forming_response(unexplained_abbr_with_slides, lambda pages: self.format_page_link(pages))
+            return answer(False, result_str)
+
+        except Exception as e:
+            return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}")
diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py
@@ -0,0 +1,89 @@
+from ..base_check import BaseReportCriterion, answer
+from ..check_abbreviations import forming_response, main_check
+
+
+class ReportAbbreviationsCheck(BaseReportCriterion):
+    label = "Проверка расшифровки аббревиатур"
+    _description = "Аббревиатуры в тексте должны быть расшифрованы при первом использовании."
+    id = "report_abbreviations_check"
+    warning = True
+
+    def __init__(self, file_info):
+        super().__init__(file_info)
+
+    def check(self):
+        try:
+            text = self._get_document_text()
+
+            headings = [
+                "СПИСОК ИСПОЛЬЗОВАННЫХ ИСТОЧНИКОВ",
+                "ПРИЛОЖЕНИЕ",
+                "ОПРЕДЕЛЕНИЯ, ОБОЗНАЧЕНИЯ И СОКРАЩЕНИЯ",
+            ]
+            unverifiable_text = self._get_unverifiable_text(headings)
+
+            continue_check, res_str, unexplained_abbr = main_check(text=text, unverifiable_text=unverifiable_text)
+            if not continue_check:
+                return answer(True, res_str)
+
+            unexplained_abbr_with_page = {}
+
+            for page_num in range(1, self.file.page_counter() + 1):
+                text_on_page = self.file.pdf_file.text_on_page[page_num]
+
+                for abbr in unexplained_abbr:
+                    if abbr in text_on_page and abbr not in unexplained_abbr_with_page:
+                        unexplained_abbr_with_page[abbr] = page_num
+
+            if not unexplained_abbr_with_page:
+                return answer(True, "Все аббревиатуры правильно расшифрованы")
+            result_str = forming_response(unexplained_abbr_with_page, lambda pages: self.format_page_link(pages))
+            return answer(False, result_str)
+
+        except Exception as e:
+            return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}")
+
+    def _get_document_text(self):
+
+        if hasattr(self.file, "pdf_file"):
+            page_texts = self.file.pdf_file.get_text_on_page()
+            return " ".join(page_texts.values())
+        elif hasattr(self.file, "paragraphs"):
+            text_parts = []
+            for paragraph in self.file.paragraphs:
+                text = paragraph.to_string()
+                if "\n" in text:
+                    text = text.split("\n")[1]
+                text_parts.append(text)
+            return "\n".join(text_parts)
+        return None
+
+    def _get_text_into_sections(self, headings):
+        chapters = self.file.make_chapters(self.file_type["report_type"])
+        text_parts = []
+
+        for chapter in chapters:
+            chapter_title = chapter.get("text", "").upper()
+
+            if any(stop.upper() in chapter_title for stop in headings):
+                text_parts.append(chapter["text"])
+
+                def add_child_text(child_elements):
+                    for child in child_elements:
+                        if child.get("text"):
+                            text_parts.append(child["text"])
+                        if child.get("child"):
+                            add_child_text(child["child"])
+
+                if chapter.get("child"):
+                    add_child_text(chapter["child"])
+
+        return " ".join(text_parts)
+
+    def _get_text_title_page(self):
+        title_page = self.file.pdf_file.text_on_page[1]
+        return title_page
+
+    def _get_unverifiable_text(self, unverifiable_headings):
+        unverifiable_text = self._get_text_title_page() + self._get_text_into_sections(unverifiable_headings)
+        return unverifiable_text
diff --git a/app/main/checks/report_checks/literature_references.py b/app/main/checks/report_checks/literature_references.py
@@ -156,7 +156,7 @@ def search_references(self, start_par):
             match = re.search(r'Таблица ([.\d]+)', paragraph_text)
             table_text = ''
             if match:
-                index_table += 1    # int(match.group(1)) - 1       # TODO: fix logic
+                index_table += 1  # int(match.group(1)) - 1       # TODO: fix logic
                 table_text = self.get_text_in_table(index_table)
 
             paragraph_text += table_text

diff --git a/app/main/checks/report_checks/main_page_settings.py b/app/main/checks/report_checks/main_page_settings.py
@@ -60,7 +60,9 @@ class ReportMainPageSetting:
             "found_value": 0,
             "found_key": 0,
             "find": 3,
-            "value": [r"(Руководитель).*([кд]\..+\.н\., (доцент|профессор))[|]*([А-Я](?:\.-?[А-Я])?\.[А-Я]\. [А-Я][а-я]+)"],  #
+            "value": [
+                r"(Руководитель).*([кд]\..+\.н\., (доцент|профессор))[|]*([А-Я](?:\.-?[А-Я])?\.[А-Я]\. [А-Я][а-я]+)"
+            ],  #
             "logs": "",
         },
         {

diff --git a/app/utils/converter.py b/app/utils/converter.py
@@ -9,11 +9,14 @@ def run_process(cmd: str):
 
 def convert_to(filepath, target_format='pdf'):
     new_filename, outdir = None, dirname(filepath)
-    convert_cmd = "timeout 3m " + {
-        'pdf': f"soffice --headless --convert-to pdf --outdir {outdir} {filepath}",
-        'docx': f"soffice --headless --convert-to docx --outdir {outdir} {filepath}",
-        'pptx': f"soffice --headless --convert-to pptx --outdir {outdir} {filepath}",
-    }[target_format]
+    convert_cmd = (
+        "timeout 3m "
+        + {
+            'pdf': f"soffice --headless --convert-to pdf --outdir {outdir} {filepath}",
+            'docx': f"soffice --headless --convert-to docx --outdir {outdir} {filepath}",
+            'pptx': f"soffice --headless --convert-to pptx --outdir {outdir} {filepath}",
+        }[target_format]
+    )
 
     if run_process(convert_cmd).returncode == 0:
         # success conversion