@@ -135,6 +135,36 @@ def workspace(self):
135135 project_workspace = current_app .ws_handler .get (self .workspace_id )
136136 return project_workspace
137137
138+ def get_latest_files_cache (self ) -> List [int ]:
139+ """Get latest file history ids either from cached table or calculate them on the fly"""
140+ if self .latest_project_files .file_history_ids is not None :
141+ return self .latest_project_files .file_history_ids
142+
143+ query = f"""
144+ WITH latest_changes AS (
145+ SELECT
146+ fp.id,
147+ pv.project_id,
148+ max(pv.name) AS version
149+ FROM
150+ project_version pv
151+ LEFT OUTER JOIN file_history fh ON fh.version_id = pv.id
152+ LEFT OUTER JOIN project_file_path fp ON fp.id = fh.file_path_id
153+ WHERE
154+ pv.project_id = :project_id
155+ AND pv.name <= :latest_version
156+ GROUP BY
157+ fp.id, pv.project_id
158+ )
159+ SELECT
160+ fh.id
161+ FROM latest_changes ch
162+ LEFT OUTER JOIN file_history fh ON (fh.file_path_id = ch.id AND fh.project_version_name = ch.version AND fh.change != 'delete')
163+ WHERE fh.id IS NOT NULL;
164+ """
165+ params = {"project_id" : self .id , "latest_version" : self .latest_version }
166+ return [row .id for row in db .session .execute (text (query ), params ).fetchall ()]
167+
138168 def cache_latest_files (self ) -> None :
139169 """Get project files from changes (FileHistory) and save them for later use."""
140170 if self .latest_version is None :
@@ -514,7 +544,11 @@ def generate_diff_name(self):
514544
515545
516546class LatestProjectFiles (db .Model ):
517- """Store project latest version files history ids"""
547+ """Store project latest version files history ids.
548+
549+ This is a caching table to store the latest relevant files history ids for further use in
550+ Project.files and ProjectVersion.files. It is updated when ProjectVersion itself is created.
551+ """
518552
519553 project_id = db .Column (
520554 UUID (as_uuid = True ),
@@ -743,22 +777,21 @@ def diffs_chain(
743777 return None , []
744778
745779 diffs = []
746- cached_items = Checkpoint .get_checkpoints (
747- basefile .project_version_name , version
748- )
780+ checkpoints = Checkpoint .get_checkpoints (basefile .project_version_name , version )
749781 expected_diffs = (
750782 FileDiff .query .filter_by (
751783 basefile_id = basefile .id ,
752784 )
753785 .filter (
754786 tuple_ (FileDiff .rank , FileDiff .version ).in_ (
755- [(item .rank , item .end ) for item in cached_items ]
787+ [(item .rank , item .end ) for item in checkpoints ]
756788 )
757789 )
758790 .all ()
759791 )
760792
761- for item in cached_items :
793+ for item in checkpoints :
794+ diff_needs_to_be_created = False
762795 diff = next (
763796 (
764797 d
@@ -767,25 +800,38 @@ def diffs_chain(
767800 ),
768801 None ,
769802 )
770- if diff and os .path .exists (diff .abs_path ):
771- diffs .append (diff )
772- elif item .rank > 0 :
773- # fallback if checkpoint does not exist: replace merged diff with individual diffs
774- individual_diffs = (
775- FileDiff .query .filter_by (
776- basefile_id = basefile .id ,
777- rank = 0 ,
803+ if diff :
804+ if os .path .exists (diff .abs_path ):
805+ diffs .append (diff )
806+ else :
807+ diff_needs_to_be_created = True
808+ else :
809+ # we do not have record in DB, create a checkpoint if it makes sense
810+ if item .rank > 0 and FileDiff .can_create_checkpoint (file_id , item ):
811+ diff = FileDiff (
812+ basefile = basefile ,
813+ version = item .end ,
814+ rank = item .rank ,
815+ path = basefile .file .generate_diff_name (),
816+ size = None ,
817+ checksum = None ,
778818 )
779- .filter (
780- FileDiff .version >= item .start , FileDiff .version <= item .end
819+ db .session .add (diff )
820+ db .session .commit ()
821+ diff_needs_to_be_created = True
822+ else :
823+ # we asked for checkpoint where there was no change
824+ continue
825+
826+ if diff_needs_to_be_created :
827+ diff_created = diff .construct_checkpoint ()
828+ if diff_created :
829+ diffs .append (diff )
830+ else :
831+ logging .error (
832+ f"Failed to create a diff for file { basefile .file .path } at version { basefile .project_version_name } of rank { item .rank } ."
781833 )
782- .order_by (FileDiff .version )
783- .all ()
784- )
785- diffs .extend (individual_diffs )
786- else :
787- # we asked for individual diff but there is no such diff as there was not change at that version
788- continue
834+ return None , []
789835
790836 return basefile , diffs
791837
@@ -924,9 +970,10 @@ def construct_checkpoint(self) -> bool:
924970 return True
925971
926972 if self .rank == 0 :
927- raise ValueError (
973+ logging . error (
928974 "Checkpoint of rank 0 should be created by user upload, cannot be constructed"
929975 )
976+ return False
930977
931978 # merged diffs can only be created for certain versions
932979 if self .version % LOG_BASE :
@@ -1434,7 +1481,7 @@ def __init__(
14341481 latest_files_map = {
14351482 fh .path : fh .id
14361483 for fh in FileHistory .query .filter (
1437- FileHistory .id .in_ (self .project .latest_project_files . file_history_ids )
1484+ FileHistory .id .in_ (self .project .get_latest_files_cache () )
14381485 ).all ()
14391486 }
14401487
@@ -1565,6 +1612,10 @@ def _files_from_end(self):
15651612 files that were delete after the version (and thus not necessarily present now). From these candidates
15661613 get the latest file change before or at the specific version. If that change was not 'delete', file is present.
15671614 """
1615+ # if we do not have cached file history ids use different strategy where it is not necessary
1616+ if self .project .latest_project_files .file_history_ids is None :
1617+ return self ._files_from_start ()
1618+
15681619 query = f"""
15691620 WITH files_changes_before_version AS (
15701621 WITH files_candidates AS (
0 commit comments