Fix the bug that collides the hash if identical images

This commit is contained in:
Gustavo Henrique Santos Souza de Miranda 2025-07-19 00:16:29 -03:00
parent 44ff390695
commit 9e4b08234f
3 changed files with 22 additions and 5 deletions

View File

@ -4,6 +4,7 @@ from pathlib import Path
from sqlalchemy import Column, Integer, String, ForeignKey, DateTime from sqlalchemy import Column, Integer, String, ForeignKey, DateTime
from sqlalchemy.orm import relationship from sqlalchemy.orm import relationship
from sqlalchemy.sql.schema import Index
from pilgrim.models.photo_in_entry import photo_entry_association from pilgrim.models.photo_in_entry import photo_entry_association
from ..database import Base from ..database import Base
@ -24,6 +25,9 @@ class Photo(Base):
) )
fk_travel_diary_id = Column(Integer, ForeignKey("travel_diaries.id"),nullable=False) fk_travel_diary_id = Column(Integer, ForeignKey("travel_diaries.id"),nullable=False)
__table_args__ = (
Index('idx_photo_hash_diary', 'hash', 'fk_travel_diary_id'),
)
def __init__(self, filepath, name, photo_hash, addition_date=None, caption=None, entries=None, fk_travel_diary_id=None, **kw: Any): def __init__(self, filepath, name, photo_hash, addition_date=None, caption=None, entries=None, fk_travel_diary_id=None, **kw: Any):
super().__init__(**kw) super().__init__(**kw)

View File

@ -14,8 +14,9 @@ class PhotoService:
def __init__(self, session): def __init__(self, session):
self.session = session self.session = session
def _hash_file(self, filepath: Path) -> str: @staticmethod
"""Calculate hash of a file using SHA3-384.""" def hash_file(filepath: Path) -> str:
"""Calculate the hash of a file using SHA3-384."""
hash_func = hashlib.new('sha3_384') hash_func = hashlib.new('sha3_384')
with open(filepath, 'rb') as f: with open(filepath, 'rb') as f:
while chunk := f.read(8192): while chunk := f.read(8192):
@ -64,10 +65,18 @@ class PhotoService:
return dest_path return dest_path
def check_photo_by_hash(self, photohash:str, traveldiaryid:int):
photo = (self.session.query(Photo).filter(Photo.photo_hash == photohash,Photo.fk_travel_diary_id == traveldiaryid)
.first())
return photo
def create(self, filepath: Path, name: str, travel_diary_id: int, caption=None, addition_date=None) -> Photo | None: def create(self, filepath: Path, name: str, travel_diary_id: int, caption=None, addition_date=None) -> Photo | None:
travel_diary = self.session.query(TravelDiary).filter(TravelDiary.id == travel_diary_id).first() travel_diary = self.session.query(TravelDiary).filter(TravelDiary.id == travel_diary_id).first()
if not travel_diary: if not travel_diary:
return None return None
photo_hash = self.hash_file(filepath)
if self.check_photo_by_hash(photo_hash, travel_diary_id):
return None
# Copy photo to diary's images directory # Copy photo to diary's images directory
copied_path = self._copy_photo_to_diary(filepath, travel_diary) copied_path = self._copy_photo_to_diary(filepath, travel_diary)
@ -79,8 +88,6 @@ class PhotoService:
except ValueError: except ValueError:
addition_date = None addition_date = None
# Calculate hash from the copied file
photo_hash = self._hash_file(copied_path)
new_photo = Photo( new_photo = Photo(
filepath=str(copied_path), # Store the path to the copied file filepath=str(copied_path), # Store the path to the copied file
@ -118,7 +125,7 @@ class PhotoService:
old_path.unlink() old_path.unlink()
original.filepath = str(new_path) original.filepath = str(new_path)
# Update hash based on the new copied file # Update hash based on the new copied file
original.photo_hash = self._hash_file(new_path) original.photo_hash = self.hash_file(new_path)
original.name = photo_dst.name original.name = photo_dst.name
original.addition_date = photo_dst.addition_date original.addition_date = photo_dst.addition_date

View File

@ -63,10 +63,16 @@ class AddPhotoModal(Screen):
async def _async_create_photo(self, photo_data: dict): async def _async_create_photo(self, photo_data: dict):
"""Creates a new photo asynchronously using PhotoService""" """Creates a new photo asynchronously using PhotoService"""
try: try:
service_manager = self.app.service_manager service_manager = self.app.service_manager
photo_service = service_manager.get_photo_service() photo_service = service_manager.get_photo_service()
if photo_service.check_photo_by_hash(photo_service.hash_file(photo_data["filepath"]),self.diary_id):
self.notify("Photo already exists in database", severity="error")
return
new_photo = photo_service.create( new_photo = photo_service.create(
filepath=Path(photo_data["filepath"]), filepath=Path(photo_data["filepath"]),
name=photo_data["name"], name=photo_data["name"],