@@ -614,58 +614,140 @@ async def process_url_message(message_text, bot, update, model, language):
614614 # note_length = len(transcription_note)
615615 # max_message_length = 4000 - note_length # Adjust max length to account for transcription note
616616
617- # message sending and chunking logic; revised
617+ # message sending and chunking logic; revised for v.0.1710
618618 if transcription_settings ['send_as_messages' ] and 'txt' in transcription_paths :
619619 try :
620- logger .info (f "Preparing to send plain text message from raw content" )
620+ logger .info ("Preparing to send plain text message from raw content" )
621621 content = transcription_note + raw_content # Add transcription note to the raw content
622622
623623 # Just to be safe, reduce the chunk even more if needed
624- safe_max = 3000 # even safer limit
624+ safe_max = 3500 # even safer limit
625+ i = 0
625626
626- for i in range (0 , len (content ), safe_max ):
627- chunk = content [i :i + safe_max ]
627+ # Replacing the old `for i in range(0, len(content), safe_max):` approach
628+ # with a `while` loop that ensures leftover text isn’t lost if we trim.
629+ while i < len (content ):
630+ # Slice up to safe_max or the end of the string
631+ chunk = content [i :i + safe_max ]
628632
629- # Optional: Make sure chunk length is safely under 4096 (should already be )
633+ # Optional: Make sure chunk length is safely under 4096 (Telegram limit is 4096 )
630634 if len (chunk ) > 4000 :
631635 chunk = chunk [:4000 ]
632636
633- # # OPTIONAL: Check if we end in the middle of an HTML tag and adjust if needed.
634- # # For example, if chunk ends with '<', we might remove that character or find the previous space:
635- # if '<' in chunk[-5:]: # crude check for partial tag at end
636- # # Try to backtrack to a space before the '<'
637- # last_space = chunk.rfind(' ')
638- # if last_space != -1:
639- # chunk = chunk[:last_space]
637+ # If this chunk is smaller than safe_max, we’re near the end
638+ # We'll still do the partial-tag and whitespace checks, but after sending, we break
639+ if len (chunk ) < safe_max :
640+ # Check partial HTML near the end (optional).
641+ if '<' in chunk [- 5 :]: # crude check for partial tag at end
642+ last_space = chunk .rfind (' ' )
643+ if last_space != - 1 :
644+ chunk = chunk [:last_space ]
640645
641- # Check if we end on a partial HTML tag
646+ # Check if we’re splitting a word
647+ last_space = chunk .rfind (' ' )
648+ if last_space == - 1 and len (chunk ) == safe_max :
649+ logger .warning ("No whitespace found. Forcibly splitting mid-word near end." )
650+ elif last_space > 0 :
651+ chunk = chunk [:last_space ]
652+
653+ # Send the last chunk
654+ await bot .send_message (
655+ chat_id = update .effective_chat .id ,
656+ text = chunk ,
657+ parse_mode = 'HTML'
658+ )
659+ logger .info (f"Sent message chunk: { (i // safe_max ) + 1 } " )
660+ break
661+
662+ # If chunk is exactly safe_max in length, do partial tag / partial word checks
663+
664+ # 1) Attempt to avoid splitting an HTML tag
642665 if '<' in chunk [- 5 :]: # crude check for partial tag at end
643666 last_space = chunk .rfind (' ' )
644667 if last_space != - 1 :
645668 chunk = chunk [:last_space ]
646-
647- # Attempt to find last whitespace so we don’t split in the middle of a word
648- # BUT if there's NO whitespace at all, we forcibly break anyway:
669+
670+ # 2) Attempt to find last whitespace so we don’t split in the middle of a word
649671 last_space = chunk .rfind (' ' )
650672 if last_space == - 1 and len (chunk ) == safe_max :
651- # This means no space was found in the entire chunk,
652- # so we forcibly keep the chunk at safe_max (which likely breaks a word).
653- logger . warning ( "No whitespace found. Forcibly splitting mid-word at position %d." , i + safe_max )
654- # chunk remains chunk[:safe_max], i.e. as-is
655- elif last_space > - 1 and last_space > 0 :
656- # We found a space within the chunk
673+ # This means no space found => forcibly keep chunk as-is
674+ logger . warning (
675+ "No whitespace found. Forcibly splitting mid-word at position %d." ,
676+ i + safe_max
677+ )
678+ elif last_space > 0 :
657679 chunk = chunk [:last_space ]
658- # Note: If you do this, you might want to adjust `i` accordingly
659- # or treat the leftover text on the next iteration.
660- # But for a simple approach, this is enough to keep the code short.
661680
662- # Now send the message
663- await bot .send_message (chat_id = update .effective_chat .id , text = chunk , parse_mode = 'HTML' )
681+ # Now send the chunk
682+ await bot .send_message (
683+ chat_id = update .effective_chat .id ,
684+ text = chunk ,
685+ parse_mode = 'HTML'
686+ )
664687 logger .info (f"Sent message chunk: { (i // safe_max ) + 1 } " )
688+
689+ # Advance i by the length of the chunk we actually sent
690+ # That leftover beyond `chunk` is still unsent, so the next loop iteration handles it
691+ i += len (chunk )
692+
665693 except Exception as e :
666694 logger .error (f"Error in sending plain text message: { e } " )
667695 else :
668696 logger .info ("Condition for sending plain text message not met." )
697+
698+ # # // old method; used up until v0.1709.2
699+ # # message sending and chunking logic; revised
700+ # if transcription_settings['send_as_messages'] and 'txt' in transcription_paths:
701+ # try:
702+ # logger.info(f"Preparing to send plain text message from raw content")
703+ # content = transcription_note + raw_content # Add transcription note to the raw content
704+
705+ # # Just to be safe, reduce the chunk even more if needed
706+ # safe_max = 3500 # even safer limit
707+
708+ # for i in range(0, len(content), safe_max):
709+ # chunk = content[i:i+safe_max]
710+
711+ # # Optional: Make sure chunk length is safely under 4096 (should already be)
712+ # if len(chunk) > 4000:
713+ # chunk = chunk[:4000]
714+
715+ # # # OPTIONAL: Check if we end in the middle of an HTML tag and adjust if needed.
716+ # # # For example, if chunk ends with '<', we might remove that character or find the previous space:
717+ # # if '<' in chunk[-5:]: # crude check for partial tag at end
718+ # # # Try to backtrack to a space before the '<'
719+ # # last_space = chunk.rfind(' ')
720+ # # if last_space != -1:
721+ # # chunk = chunk[:last_space]
722+
723+ # # Check if we end on a partial HTML tag
724+ # if '<' in chunk[-5:]: # crude check for partial tag at end
725+ # last_space = chunk.rfind(' ')
726+ # if last_space != -1:
727+ # chunk = chunk[:last_space]
728+
729+ # # Attempt to find last whitespace so we don’t split in the middle of a word
730+ # # BUT if there's NO whitespace at all, we forcibly break anyway:
731+ # last_space = chunk.rfind(' ')
732+ # if last_space == -1 and len(chunk) == safe_max:
733+ # # This means no space was found in the entire chunk,
734+ # # so we forcibly keep the chunk at safe_max (which likely breaks a word).
735+ # logger.warning("No whitespace found. Forcibly splitting mid-word at position %d.", i + safe_max)
736+ # # chunk remains chunk[:safe_max], i.e. as-is
737+ # elif last_space > -1 and last_space > 0:
738+ # # We found a space within the chunk
739+ # chunk = chunk[:last_space]
740+ # # Note: If you do this, you might want to adjust `i` accordingly
741+ # # or treat the leftover text on the next iteration.
742+ # # But for a simple approach, this is enough to keep the code short.
743+
744+ # # Now send the message
745+ # await bot.send_message(chat_id=update.effective_chat.id, text=chunk, parse_mode='HTML')
746+ # logger.info(f"Sent message chunk: {(i // safe_max) + 1}")
747+ # except Exception as e:
748+ # logger.error(f"Error in sending plain text message: {e}")
749+ # else:
750+ # logger.info("Condition for sending plain text message not met.")
669751
670752 # // old method
671753 # if transcription_settings['send_as_messages'] and 'txt' in transcription_paths:
0 commit comments