Merge pull request #60 from gmbrax/fix/Importing-duplicate-images

Fix the bug that collides the hash if identical images
This commit is contained in:
Gustavo Henrique Miranda 2025-07-19 03:23:54 -03:00 committed by GitHub
commit 8919e158bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 22 additions and 5 deletions

View File

@ -4,6 +4,7 @@ from pathlib import Path
from sqlalchemy import Column, Integer, String, ForeignKey, DateTime from sqlalchemy import Column, Integer, String, ForeignKey, DateTime
from sqlalchemy.orm import relationship from sqlalchemy.orm import relationship
from sqlalchemy.sql.schema import Index
from pilgrim.models.photo_in_entry import photo_entry_association from pilgrim.models.photo_in_entry import photo_entry_association
from ..database import Base from ..database import Base
@ -24,6 +25,9 @@ class Photo(Base):
) )
fk_travel_diary_id = Column(Integer, ForeignKey("travel_diaries.id"),nullable=False) fk_travel_diary_id = Column(Integer, ForeignKey("travel_diaries.id"),nullable=False)
__table_args__ = (
Index('idx_photo_hash_diary', 'hash', 'fk_travel_diary_id'),
)
def __init__(self, filepath, name, photo_hash, addition_date=None, caption=None, entries=None, fk_travel_diary_id=None, **kw: Any): def __init__(self, filepath, name, photo_hash, addition_date=None, caption=None, entries=None, fk_travel_diary_id=None, **kw: Any):
super().__init__(**kw) super().__init__(**kw)

View File

@ -14,8 +14,9 @@ class PhotoService:
def __init__(self, session): def __init__(self, session):
self.session = session self.session = session
def _hash_file(self, filepath: Path) -> str: @staticmethod
"""Calculate hash of a file using SHA3-384.""" def hash_file(filepath: Path) -> str:
"""Calculate the hash of a file using SHA3-384."""
hash_func = hashlib.new('sha3_384') hash_func = hashlib.new('sha3_384')
with open(filepath, 'rb') as f: with open(filepath, 'rb') as f:
while chunk := f.read(8192): while chunk := f.read(8192):
@ -64,10 +65,18 @@ class PhotoService:
return dest_path return dest_path
def check_photo_by_hash(self, photohash:str, traveldiaryid:int):
photo = (self.session.query(Photo).filter(Photo.photo_hash == photohash,Photo.fk_travel_diary_id == traveldiaryid)
.first())
return photo
def create(self, filepath: Path, name: str, travel_diary_id: int, caption=None, addition_date=None) -> Photo | None: def create(self, filepath: Path, name: str, travel_diary_id: int, caption=None, addition_date=None) -> Photo | None:
travel_diary = self.session.query(TravelDiary).filter(TravelDiary.id == travel_diary_id).first() travel_diary = self.session.query(TravelDiary).filter(TravelDiary.id == travel_diary_id).first()
if not travel_diary: if not travel_diary:
return None return None
photo_hash = self.hash_file(filepath)
if self.check_photo_by_hash(photo_hash, travel_diary_id):
return None
# Copy photo to diary's images directory # Copy photo to diary's images directory
copied_path = self._copy_photo_to_diary(filepath, travel_diary) copied_path = self._copy_photo_to_diary(filepath, travel_diary)
@ -79,8 +88,6 @@ class PhotoService:
except ValueError: except ValueError:
addition_date = None addition_date = None
# Calculate hash from the copied file
photo_hash = self._hash_file(copied_path)
new_photo = Photo( new_photo = Photo(
filepath=str(copied_path), # Store the path to the copied file filepath=str(copied_path), # Store the path to the copied file
@ -118,7 +125,7 @@ class PhotoService:
old_path.unlink() old_path.unlink()
original.filepath = str(new_path) original.filepath = str(new_path)
# Update hash based on the new copied file # Update hash based on the new copied file
original.photo_hash = self._hash_file(new_path) original.photo_hash = self.hash_file(new_path)
original.name = photo_dst.name original.name = photo_dst.name
original.addition_date = photo_dst.addition_date original.addition_date = photo_dst.addition_date

View File

@ -63,10 +63,16 @@ class AddPhotoModal(Screen):
async def _async_create_photo(self, photo_data: dict): async def _async_create_photo(self, photo_data: dict):
"""Creates a new photo asynchronously using PhotoService""" """Creates a new photo asynchronously using PhotoService"""
try: try:
service_manager = self.app.service_manager service_manager = self.app.service_manager
photo_service = service_manager.get_photo_service() photo_service = service_manager.get_photo_service()
if photo_service.check_photo_by_hash(photo_service.hash_file(photo_data["filepath"]),self.diary_id):
self.notify("Photo already exists in database", severity="error")
return
new_photo = photo_service.create( new_photo = photo_service.create(
filepath=Path(photo_data["filepath"]), filepath=Path(photo_data["filepath"]),
name=photo_data["name"], name=photo_data["name"],